Analysis of Coarse Grained A2 Simulations¶
1) Notebook Setup¶
1.1) Import Required Modules¶
import MDAnalysis as mda
import MDAnalysis.analysis.msd as msd
import MDAnalysis.analysis.rdf
from MDAnalysis import transformations
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.lines as mlines
import pandas as pd
import scipy.stats
from scipy.signal import find_peaks
import math
import pandas as pd
import re
%matplotlib inline
1.2) Plot Parameters¶
# Reset parameters to their default state.
mpl.rcParams.update(mpl.rcParamsDefault)
# Font parameters.
mpl.rcParams["font.size"] = 20
mpl.rcParams["axes.labelsize"] = 30
mpl.rcParams["lines.linewidth"] = 2.3
mpl.rcParams["font.family"] = "Serif"
1.3) Analysis Parameters¶
# Load trajectory
u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')
# Divisor used to track the progress of analysis
track_divisor = int(len(u.trajectory) / 10)
track_divisor
1500
2) Preparation¶
Collect some data that will be used throughout and define a few functions for facilitating analysis.
2.1) Get Time¶
Get and save the time so that it can be used during plotting.
2.1.1) Load Trajectory Collect, and Save Time¶
## Load trajectory
#u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')
#ii = 0
#time = []
#for ts in u.trajectory:
# if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
# time.append(u.trajectory.time)
# ii += 1
#
## Convert time units from ps to us
#time = np.array(time)
#time
#time = pd.DataFrame(time)
#time.to_csv("analysis/time/time.csv")
2.1.2) Quick Import After First Time¶
time = pd.read_csv("analysis/time/time.csv")
time = np.array(time["Time"])
time
array([0.0000e+00, 1.0000e+03, 2.0000e+03, ..., 1.4998e+07, 1.4999e+07,
1.5000e+07])
2.2) Function Definitions¶
2.2.1) Import All Trajectories¶
# A dictionary containing all of the trajectories is returned.
def get_trajectories():
trajectories = {}
for ii in range(1, 6):
u = mda.Universe("trajectory/image" + str(ii) + ".tpr",
"trajectory/reimaged" + str(ii) + ".xtc")
trajectories["trajectory" + str(ii)] = u
return(trajectories)
2.2.2) Iterate Analysis over Trajectories¶
# Generic iterator to perform a given analysis on each trajectory
# and return the results as a dictionary.
# Takes
# (1) A function to perform the analysis.
# (2) The analysis dictionary.
def analyze_trajectories(analysis, trajectories):
results = {}
for traj in trajectories:
results[traj] = analysis(trajectories[traj])
return(results)
2.2.3) Iterate Plotting/Analysis Function Over Trajectory¶
# Generic iterator to perform a given analysis on each trajectory
# and return the results as a dictionary.
# Takes
# (1) A function to perform the analysis.
# (2) The analysis dictionary.
def present_analysis(presentation, trajectories):
for traj in trajectories:
print("The following plots correcpond to:", traj)
presentation(trajectories[traj])
3) Eccentricity¶
3.1) Necessary Functions¶
3.1.1) Calculate Moments of Inertia¶
# Provide an mdanalysis universe
def calculate_moi(u):
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
ii = 0 # Used to track calculation progress
# Moment of inertia about each cartesian axis
Ix = []
Iy = []
Iz = []
# N x 3 mass matrix for faster multiplication
mass = bilayer.positions
mass[:, 0] = bilayer.masses
mass[:, 1] = bilayer.masses
mass[:, 2] = bilayer.masses
# Iterate over all timesteps
for ts in u.trajectory:
# Caculate Ix, Iy, Iz
bilayer_coordinates = bilayer.positions - \
bilayer.center_of_mass()
position_squared_times_mass = bilayer_coordinates * \
bilayer_coordinates * \
mass
Ix.append(np.sum(position_squared_times_mass[:, 1] + \
position_squared_times_mass[:, 2]))
Iy.append(np.sum(position_squared_times_mass[:, 0] + \
position_squared_times_mass[:, 2]))
Iz.append(np.sum(position_squared_times_mass[:, 0] + \
position_squared_times_mass[:, 1]))
# Check progress
if ii % track_divisor == 0:
print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
ii += 1
moi_results = [Ix, Iy, Iz]
print(' ')
return(moi_results)
3.1.2) Presenting Eccentricity Results¶
def present_moi_results(moi_data):
# Collect data
Ix, Iy, Iz = moi_data[0], moi_data[1], moi_data[2]
I_all = np.array(Ix + Iy + Iz)
I_min = np.min(I_all)
I_avg = np.average(I_all)
# Calculate eccentricity
eccentricity = 1 - (I_min / I_avg)
print("Imin is: %f" % I_min)
print("Iavg is: %f" % I_avg)
print("The eccentricity of the vesicle is: %f" % eccentricity)
# Initialize plotting.
fig, ax = plt.subplots()
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Adjust figure size.
fig.set_size_inches(7.5, 4)
# Plot data.
ax.plot(time / 10**6, Ix, label = "Ix")
ax.plot(time / 10**6, Iy, label = "Iy")
ax.plot(time / 10**6, Iz, label = "Iz")
ax.axhline(I_min, color = "red",linestyle = "--")
#ax.annotate('$I_{avg}$', xy=(-0.8, 2.285e11), xytext=(-0.8, 2.285e11))
ax.axhline(I_avg, color = "black",linestyle = "--")
#ax.annotate('$I_{min}$', xy=(-0.8, 2.22e11), xytext=(-0.8, 2.22e11))
# Set labels
ax.set_xlabel("Time (µs)")
ax.set_ylabel("MOI (amu Ų)")
ax.legend(loc = "upper right")
plt.show()
3.1.3) Combined Plot¶
def combined_moi_plot(moi_results):
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(27, 21)
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]
for ii in range(0, 5):
trajectory = "trajectory" + str(ii + 1)
plot_data = moi_results[trajectory]
# Collect data
Ix, Iy, Iz = plot_data[0], plot_data[1], plot_data[2]
I_all = np.array(Ix + Iy + Iz)
I_min = np.min(I_all)
I_avg = np.average(I_all)
# Calculate eccentricity
eccentricity = 1 - (I_min / I_avg)
print("Imin is: %f" % I_min)
print("Iavg is: %f" % I_avg)
print("The eccentricity of the vesicle is: %f" % eccentricity)
# Remove top and right spines from the figure.
axes[ii].spines["top"].set_visible(False)
axes[ii].spines["right"].set_visible(False)
# Plot data.
axes[ii].plot(time / 10**6, Ix, label = "Ix")
axes[ii].plot(time / 10**6, Iy, label = "Iy")
axes[ii].plot(time / 10**6, Iz, label = "Iz")
axes[ii].axhline(I_min, color = "red",linestyle = "--")
axes[ii].text(0., I_min + 150000000, '$I_{avg}$')
axes[ii].axhline(I_avg, color = "black",linestyle = "--")
# Set labels
axes[ii].set_xlabel("Time (µs)")
axes[ii].set_ylabel("MOI (amu Ų)")
axes[ii].legend(loc = "upper right")
plt.tight_layout()
plt.show()
3.2) Re-Initialize Data and Perform Calculations¶
#trajectories = get_trajectories()
#moi_results = analyze_trajectories(calculate_moi, trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
3.3) Save Results¶
#for key in moi_results:
# data = {}
# data["Ix"] = moi_results[key][0]
# data["Iy"] = moi_results[key][1]
# data["Iz"] = moi_results[key][2]
# data = pd.DataFrame.from_dict(data)
# data.to_csv("analysis/eccentricity/" + key + ".csv", index = False)
3.4) Read Saved Results¶
prefix = "analysis/eccentricity/"
moi_results = {}
for ii in range(1, 6):
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
moi_results["trajectory" + str(ii)] = [list(data.iloc[:, 0]),
list(data.iloc[:, 1]),
list(data.iloc[:, 2])]
3.5) Plot Moments of Inertia¶
#present_analysis(present_moi_results, moi_results)
combined_moi_plot(moi_results)
Imin is: 25105518893.873730 Iavg is: 26883445986.242313 The eccentricity of the vesicle is: 0.066135 Imin is: 25040613151.147152 Iavg is: 26800202812.448631 The eccentricity of the vesicle is: 0.065656 Imin is: 24477737596.515190 Iavg is: 26653219079.009644 The eccentricity of the vesicle is: 0.081622 Imin is: 24894967126.129841 Iavg is: 26731160513.143406 The eccentricity of the vesicle is: 0.068691 Imin is: 24453023185.326435 Iavg is: 26607182251.565849 The eccentricity of the vesicle is: 0.080962
3.6) Average Eccentricity¶
mean = np.mean([0.066135, 0.065656, 0.081622, 0.068691, 0.080962])
std = np.std([0.066135, 0.065656, 0.081622, 0.068691, 0.080962])
stderr_x2 = np.std([0.066135, 0.065656, 0.081622, 0.068691, 0.080962]) / np.sqrt(5.)
print("The mean eccentricity is:", mean, '±', stderr_x2)
The mean eccentricity is: 0.0726132 ± 0.003203837067018234
4) Radial Distribution for Membrane Dimensions¶
4.1) Necessary Functions¶
4.1.1) Universe Transformation to Switch an Atom's Coordinates with the System's COM¶
class replace_with_COM:
"""Replace special atom `atomname` in each fragment with COM of the fragment."""
def __init__(self, bilayer, selection):
self.bilayer = bilayer
self.com_atoms = bilayer.select_atoms(selection)
def get_com(self):
return self.bilayer.center_of_mass()
def __call__(self, ts):
self.com_atoms.positions = np.array([list(self.get_com())])
return ts
4.1.2) Radial Distribution Function Calculation¶
def calculate_rdf(u):
ucom = mda.Universe(u.filename, u.trajectory.filename)
# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
"(resname POPS and name PO4 CNO) or " + \
"(resname POP2 and name P1 P2 C1 C2 C3 PO4)"
head_group = ucom.select_atoms(selection)
bilayer = ucom.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
selection = "resnum 1 and name C4A"
ucom.trajectory.add_transformations(replace_with_COM(bilayer, selection))
selection = "resnum 1 and name C4A"
com_atoms = ucom.select_atoms(selection)
rdf = mda.analysis.rdf.InterRDF(com_atoms, head_group, nbins = 1000, range = (65., 160.), verbose = True)
rdf.run(step = 1)
return(rdf.results)
4.1.3) Present RDF Results¶
def present_rdf_results(rdf_data):
peaks, _ = find_peaks(rdf_data["rdf"], width = 40)
rdf_min = rdf_data["rdf"][peaks[0]]
rdf_max = rdf_data["rdf"][peaks[1]]
dist_min = rdf_data["bins"][peaks[0]]
dist_max = rdf_data["bins"][peaks[1]]
dist_mid = (dist_min + dist_max) / 2
thickness = dist_max - dist_min
fig, ax = plt.subplots(figsize = (9.5, 4))
mpl.rcParams['axes.facecolor'] = (1,1,1,0)
ax.plot(rdf_data["bins"], rdf_data["rdf"])
ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))
ax.set_xlabel("Distance from Vesicle Center (Ã…)")
ax.set_ylabel("g(r)")
ax.plot((dist_min, dist_min), (0., rdf_min),
color = "tab:blue", linestyle = "--")
ax.plot((dist_max, dist_max), (0., rdf_max),
color = "tab:blue", linestyle = "--")
ax.plot((dist_min, dist_max), (rdf_max, rdf_max),
color = "tab:blue", linestyle = "--")
ax.text(dist_max + 1.2, rdf_max - 1, str(np.round(dist_max, 2)))
ax.text(dist_min + 1.2, rdf_min - 1, str(np.round(dist_min, 2)))
ax.text(dist_mid - 5, rdf_max + 0.5, str(np.round(thickness, 2)))
plt.show()
4.1.4) Combined RDF Plot¶
def combined_rdf_plot(rdf_results):
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(30, 21)
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]
for ii in range(0, 5):
trajectory = "trajectory" + str(ii + 1)
plot_data = rdf_results[trajectory]
peaks, _ = find_peaks(plot_data["rdf"], width = 40)
rdf_min = plot_data["rdf"][peaks[0]]
rdf_max = plot_data["rdf"][peaks[1]]
dist_min = plot_data["bins"][peaks[0]]
dist_max = plot_data["bins"][peaks[1]]
dist_mid = (dist_min + dist_max) / 2
thickness = dist_max - dist_min
axes[ii].plot(plot_data["bins"], plot_data["rdf"])
axes[ii].yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))
axes[ii].set_xlabel("Distance from Vesicle Center (Ã…)")
axes[ii].set_ylabel("g(r)")
axes[ii].plot((dist_min, dist_min), (0., rdf_min),
color = "tab:blue", linestyle = "--")
axes[ii].plot((dist_max, dist_max), (0., rdf_max),
color = "tab:blue", linestyle = "--")
axes[ii].plot((dist_min, dist_max), (rdf_max, rdf_max),
color = "tab:blue", linestyle = "--")
axes[ii].text(dist_max + 1.2, rdf_max - 1, str(np.round(dist_max, 2)))
axes[ii].text(dist_min + 1.2, rdf_min - 1, str(np.round(dist_min, 2)))
axes[ii].text(dist_mid - 5, rdf_max + 0.5, str(np.round(thickness, 2)))
plt.tight_layout()
plt.show()
4.2) Perform Analysis¶
#trajectories = get_trajectories()
#rdf_results = analyze_trajectories(calculate_rdf, trajectories)
0%| | 0/15001 [00:00<?, ?it/s]
0%| | 0/15001 [00:00<?, ?it/s]
0%| | 0/15001 [00:00<?, ?it/s]
0%| | 0/15001 [00:00<?, ?it/s]
0%| | 0/15001 [00:00<?, ?it/s]
4.3) Save Results¶
#for key in rdf_results:
# data = {}
# data["RDF"] = rdf_results[key]["rdf"]
# data["Location"] = rdf_results[key]["bins"]
# data = pd.DataFrame.from_dict(data)
# data.to_csv("analysis/rdf/" + key + ".csv", index = False)
4.4) Read Saved Results¶
prefix = "analysis/rdf/vesi_dimensions/"
rdf_results = {}
for ii in range(1, 6):
traj_name = "trajectory" + str(ii)
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
rdf_results[traj_name] = {}
rdf_results[traj_name]["bins"] = list(data.iloc[:, 1])
rdf_results[traj_name]["rdf"] = list(data.iloc[:, 0])
4.5) Present Results¶
#present_analysis(present_rdf_results, rdf_results)
combined_rdf_plot(rdf_results)
4.6) Average Vesicle Dimensions¶
Supplemental collection of data for usage in later analysis.
upper_radii = [122.81, 122.52, 122.43, 122.14, 122.24]
lower_radii = [81.48, 81.10, 80.63, 81.29, 80.63]
thickness = [41.32, 41.42, 41.80, 40.85, 41.61]
mean_upper_rad = np.mean(upper_radii )
stderr_upper_rad = np.std(upper_radii ) / np.sqrt(5.)
mean_lower_rad = np.mean(lower_radii)
stderr_lower_rad = np.std(lower_radii ) / np.sqrt(5.)
mean_thickness = np.mean(thickness )
stderr_thickness = np.std(thickness) / np.sqrt(5.)
print("The mean upper radius is:", mean_upper_rad, '±', stderr_upper_rad * 2.)
print("The mean lower radius is:", mean_lower_rad, '±', stderr_lower_rad * 2.)
print("The mean thickness is:", mean_thickness, '±', stderr_thickness * 2.)
5) Average Protein Height¶
5.1) Necessary Functions¶
5.1.1) Average Outer Leaflet Radius by Frame¶
def outer_leaflet_height(u):
# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
"(resname POPS and name PO4 CNO) or " + \
"(resname POP2 and name P1 P2 C1 C2 C3 PO4)"
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
# Collect head group centers of mass.
head_group_com = np.empty((int(u.trajectory.n_frames) + 1, head_group.n_residues, 3))
ii = 0
for ts in u.trajectory:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
head_group.positions = head_group.positions - bilayer.center_of_mass()
head_group_com[ii, :] = head_group.center_of_mass(compound = 'residues')
ii += 1
print(' ')
# Get the magnitude of the COM vectors
dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))
dist_from_center.sort()
# Distribution should be bimodal with a health split between the distributions.
# We can exploit this to determine at exactly what point the split occurs
max_diff = np.max(dist_from_center[1:] - dist_from_center[0:-1])
diff = dist_from_center[1:] - dist_from_center[0:-1]
ii = 0
for ii in range(len(diff)):
if diff[ii] == max_diff:
break
ii += 1
ii += 1
lower_bound = ii
# Use the determined lower-bound for the upper leaflet lipids to
# generate a list for selecting the lipids of the upper leaflet
upper_leaflet_min = dist_from_center[lower_bound]
dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))
upper_selection = dist_from_center >= upper_leaflet_min
upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
# Define the upper-leaflet lipids
upper_head_group_com = head_group_com[:, upper_selection, :]
upper_leaf_radius = []
ii = 0
for ts in u.trajectory:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
radius = np.linalg.norm(upper_head_group_com[ii, :, :], axis = 1)
radius = np.mean(radius)
upper_leaf_radius.append(radius)
ii += 1
print(' ')
upper_leaf_radius = np.array(upper_leaf_radius)
return(upper_leaf_radius)
5.1.2) Calculate Average Protein Height¶
def average_protein_height(u):
upper_leaf_radius = outer_leaflet_height(u)
# Generate Protein ids and selections
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
# Create selections for groups that may be used for analysis
protein = u.select_atoms(selection)
letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
letter_to_molid = dict(zip(letters, list(set(protein.molnums))))
lines = []
for let in letter_to_molid:
lines.append("[ " + let + " ]" + '\n')
sele_ids = protein.select_atoms("molnum " + str(letter_to_molid[let])).ids
for ii in range(len(sele_ids) // 15):
jj = ii * 15
kk = (ii + 1) * 15
lines.append(' '.join(list(sele_ids[jj:kk].astype(str))) + '\n')
jj = kk
kk = jj + len(sele_ids % 15)
lines.append(' '.join(list(sele_ids[jj:kk].astype(str))) + '\n')
#with open("analysis/distance/scripts/index.ndx", 'w') as f:
# for l in lines:
# f.writelines(l)
#--------------------------------------------------------------------
bilayer = u.select_atoms("resname POPC or " + \
"resname POPS or " + \
"resname POP2 or " + \
"resname CHOL")
# Create selections for groups that may be used for analysis
protein = u.select_atoms("(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)")
#--------------------------------------------------------------------
protein_com = np.empty((int(u.trajectory.n_frames) + 1, 10, 3))
ii = 0
for ts in u.trajectory:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
protein.positions = protein.positions - bilayer.center_of_mass()
protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
ii += 1
print(' ')
#--------------------------------------------------------------------
height = []
ii = 0
for ts in u.trajectory:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
protein_height = np.linalg.norm(protein_com[ii, :, :], axis = 1)
protein_height = protein_height
protein_height = np.mean(protein_height) - upper_leaf_radius[ii]
height.append(protein_height)
ii += 1
print(' ')
height = np.array(height)
return(height)
5.1.3) Present Mean A2 Height¶
def present_mean_height(height):
# Initialize plotting.
fig, ax = plt.subplots()
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Adjust figure size.
fig.set_size_inches(15, 4)
# Plot data.
ax.plot(time / 10 ** 6, height)
mean_height = np.mean(height[4001:])
ax.axhline(mean_height, linestyle = "dashed",
color = "orange")
mean_height_str = str(np.round(mean_height, 2))
ax.text(0., mean_height - 4., mean_height_str)
#ax.annotate(mean_height_str, xy = (-0.5, 15.),
# xytext = (-0.5, 15.))
# Set labels
ax.set_xlabel("Time (µs)")
ax.set_ylabel("Protein\nHeight (Ã…)")
plt.show()
5.1.4) Combined Protein Height Plot¶
def combined_average_height_plot(height):
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(30, 21)
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]
for ii in range(0, 5):
trajectory = "trajectory" + str(ii + 1)
plot_data = height[trajectory]
# Remove top and right spines from the figure.
axes[ii].spines["top"].set_visible(False)
axes[ii].spines["right"].set_visible(False)
# Plot data.
axes[ii].plot(time / 10 ** 6, plot_data)
mean_height = np.mean(plot_data[4001:])
axes[ii].axhline(mean_height, linestyle = "dashed",
color = "orange")
mean_height_str = str(np.round(mean_height, 2))
axes[ii].text(0., mean_height - 4., mean_height_str)
# Set labels
axes[ii].set_xlabel("Time (µs)")
axes[ii].set_ylabel("Protein\nHeight (Ã…)")
plt.tight_layout()
plt.show()
5.2) Perform Analysis¶
#trajectories = get_trajectories()
#mean_height_results = analyze_trajectories(average_protein_height,
# trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
5.3) Save Results¶
#for key in mean_height_results:
# data = {}
# data["Mean Height"] = mean_height_results[key]
# data = pd.DataFrame.from_dict(data)
# data.to_csv("analysis/mean_height/" + key + ".csv", index = False)
5.4) Read Saved Results¶
prefix = "analysis/mean_height/"
mean_height_results = {}
for ii in range(1, 6):
traj_name = "trajectory" + str(ii)
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
mean_height_results[traj_name] = list(data.iloc[:, 0])
5.4) Present Results¶
#present_analysis(present_mean_height, mean_height_results)
combined_average_height_plot(mean_height_results)
6) Individual Protein Heights¶
6.1) Necessary Functions¶
6.1.1) Individual Protein Height Calculation¶
def individual_protein_height(u):
upper_leaf_radius = outer_leaflet_height(u)
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
#--------------------------------------------------------------------
protein_com = np.empty((int(u.trajectory.n_frames) + 1, 10, 3))
ii = 0
for ts in u.trajectory:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
protein.positions = protein.positions - bilayer.center_of_mass()
protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
ii += 1
print(' ')
#--------------------------------------------------------------------
height = []
ii = 0
for ts in u.trajectory:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
protein_height = np.linalg.norm(protein_com[ii, :, :], axis = 1)
protein_height = protein_height - upper_leaf_radius[ii]
height.append(protein_height)
ii += 1
print(' ')
height = np.array(height)
return(height)
6.1.2) Present Individual A2 Heights¶
def present_individual_height(height):
np.seterr(all = "ignore")
title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E',
5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
kk = 0
for jj in range(0, 5):
for ii in range(0, 2):
axs[jj, ii].plot(time / 10 ** 6, height[kk, :], label = "POPC")
axs[jj, ii].set_title(title[kk])
if ii == 0:
axs[jj, ii].set_ylabel("Protein Height (Ã…)")
if (kk == 8) or (kk == 9):
axs[jj, ii].set_xlabel("Time (μs)")
print(np.mean(height[kk, 4001:]))
kk += 1
plt.tight_layout()
plt.show()
6.2) Collect Protein Center of Mass Data¶
#trajectories = get_trajectories()
#individual_height_results = analyze_trajectories(individual_protein_height,
# trajectories)
6.3) Save Results¶
#for key in individual_height_results:
# data = {}
# for ii in range(0, 10):
# a2_num = str(ii)
# data[a2_num] = individual_height_results[key][:, ii]
# data = pd.DataFrame.from_dict(data)
# data.to_csv("analysis/individual_heights/" + key + ".csv", index = False)
6.4) Read Saved Results¶
prefix = "analysis/individual_heights/"
test_height_results = {}
for ii in range(1, 6):
traj_name = "trajectory" + str(ii)
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
test_height_results[traj_name] = []
for jj in range(0, 10):
test_height_results[traj_name].append(list(data.iloc[:, jj]))
test_height_results[traj_name] = np.array(test_height_results[traj_name])
6.5) Plot Results¶
present_analysis(present_individual_height, test_height_results)
The following plots correcpond to: trajectory1 16.36141268935273 13.263999098511714 14.673101744303988 16.024557948998932 24.104433819506582 17.929533177219565 16.89226795355231 20.019993026396328 13.122271222091253 41.85036369555732
The following plots correcpond to: trajectory2 20.81562603128449 16.02955003269995 13.917635309766927 14.040614098694506 22.29378603085873 15.3228433766458 40.09892490280148 20.339787894553815 23.33543307070982 23.31388812162696
The following plots correcpond to: trajectory3 18.754431424241652 12.46127924252203 12.881562900738954 16.383112860711385 19.607248381589514 25.583151491715647 12.885481718788904 15.04967971842962 13.740588216043191 14.287784206267867
The following plots correcpond to: trajectory4 44.322678684677456 12.007456283837552 13.204226866701088 20.06152663035018 14.41624750768596 14.55562913187548 19.619597389785287 12.757587773977136 22.867351389740207 20.686046604104686
The following plots correcpond to: trajectory5 18.503970654599453 12.081922713876983 19.091195351463504 20.767847493631717 18.94753524650009 14.97269797284841 12.528553157370691 22.00702322102428 14.67968328995768 17.588718550617287
7) A2-A2 Contact Analysis¶
7.1) Necessary Functions¶
7.1.1) Convert from 3 Letter to 1 Letter and Vice Versa¶
convert_aa = np.frompyfunc(mda.lib.util.convert_aa_code, 1, 1)
7.1.2) Count Contacts between A2¶
def protein_protein_contacts(u):
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
first_a2 = np.min(protein.molnums)
residue_lookup = protein.atoms[protein.molnums == first_a2].resnums - \
first_a2 + 30
residue_lookup = convert_aa(protein.atoms[protein.molnums == first_a2].resnames) + \
residue_lookup.astype(str)
residue_lookup = list(residue_lookup)
residue_lookup *= len(np.unique(protein.molnums))
residue_lookup = np.array(residue_lookup)
# Used to collect all of the information about each
# contact
contacts = np.array([])
# Progress tracker
ii = 0
for ts in u.trajectory[14000:]:
# Calculate distances
if ii % 100 == 0:
print(int(((ii / 100)) * 10), "% ", sep = '', end = '')
search_results = mda.lib.distances.capped_distance(
protein.atoms.positions,
protein.atoms.positions,
6.)
contact_1_sele = list(search_results[0][:, 0])
contact_2_sele = list(search_results[0][:, 1])
distances = list(search_results[1])
# ID and replace contacts not from the same monomer
np.unique((np.array(contact_1_sele) - np.array(contact_2_sele)) > 678, return_counts = True)
mask = (np.array(contact_1_sele) - np.array(contact_2_sele)) > 678
condition1 = contact_1_sele * mask != 0
condition2 = contact_2_sele * mask != 0
contact_1_sele = np.extract(condition1, contact_1_sele * mask)
contact_2_sele = np.extract(condition2, contact_2_sele * mask)
if len(distances) > 0:
contacts = np.concatenate([contacts, residue_lookup[contact_1_sele],
residue_lookup[contact_2_sele]])
ii +=1
print('')
return(contacts)
7.1.3) Present a Contact Table and Generate a PDB with Percentages¶
def present_a2_a2_contacts(a2_a2_contact_results):
# Combine all results and pool contact counts
all_a2_a2_contacts = np.concatenate([a2_a2_contact_results[key] for key in a2_a2_contact_results])
contacts_counts = np.unique(all_a2_a2_contacts, return_counts = True)
contacts = contacts_counts[0]
counts = contacts_counts[1]
# Collect information for making a table and pdb file
contact_info = {}
contact_info["Percentage"] = (counts / np.sum(counts)) * 100.
contact_info["Percentage"] = np.round(contact_info["Percentage"], 2).astype(str)
contact_info["Resnum"] = np.array([ii.strip("ABCDEFGHIJKLMNOPQRSTUVWXYZ") for ii in contacts])
contact_info["Resname"] = convert_aa(np.array([ii.strip("0123456789") for ii in contacts]))
contact_info["One Letter"] = convert_aa(contact_info["Resname"])
# Generate the table using pandas and present the top 20 entries
residue = contact_info["One Letter"] + \
(contact_info["Resnum"].astype(int) + 1).astype(str)
percentage = contact_info["Percentage"].astype(float)
contact_table = pd.DataFrame.from_dict({"Residue" : residue,
"Percentage" : percentage})
contact_table = contact_table.sort_values("Percentage",
ascending = False,
ignore_index = True)
print(contact_table.iloc[:20])
# Generate a pdb file with the b-factors replaced by the % of A2-A2
# contacts attributable to each residue
with open("analysis/contact/a2_coarse.pdb", 'r') as f:
contents = f.readlines()
with open("analysis/contact/protein_contacts/contact.pdb", 'w') as f:
for line in contents:
for ii in range(len(contact_info["Resname"])):
pattern = r'(^.................'
pattern = pattern + contact_info["Resname"][ii]
pattern = pattern + ((6 - len(contact_info["Resnum"][ii])) * '.')
pattern = pattern + contact_info["Resnum"][ii]
pattern = pattern + ('.' * 36) + ')' + '0.00' + '(.*)'
replacement = r'\g<1>' + contact_info["Percentage"][ii] + '\g<2>'
line = re.sub(pattern, replacement, line)
f.writelines(line)
7.2) Perform Calculations¶
trajectories = get_trajectories()
a2_a2_contact_results = analyze_trajectories(protein_protein_contacts,
trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
7.3) Present Table and Generate PDB¶
present_a2_a2_contacts(a2_a2_contact_results)
Residue Percentage 0 Y238 3.57 1 K206 2.21 2 P237 2.09 3 Y317 2.01 4 K204 1.95 5 K80 1.88 6 K246 1.84 7 K281 1.83 8 R78 1.79 9 R77 1.61 10 K49 1.53 11 F33 1.45 12 K324 1.38 13 K313 1.37 14 Y311 1.36 15 K47 1.30 16 T79 1.28 17 F73 1.23 18 K249 1.21 19 Q321 1.18
7.4) Visualize Results¶
Open the resulting PDB file and run the spectrum b command to color the system based on the fraction of contacts. To use the pretty viridis color palette instead of the default garbage use the following sequence of commands.
set_color color1, [68, 1, 84]
set_color color2, [49, 104, 142]
set_color color3, [53, 183, 121]
set_color color4, [253, 231, 37]
spectrum b, color1 color2 color3 color4
8) A2-Bilayer Contact Analysis¶
8.1) Necessary Functions¶
8.1.1) Count Contacts Between A2 and Membrane¶
def protein_membrane_contacts(u):
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
# Generate list to lookup A2 residue number
# Universe contains wrong residue numbers for each A2
first_a2 = np.min(protein.molnums)
residue_lookup = protein.atoms[protein.molnums == first_a2].resnums - \
first_a2 + 30
residue_lookup = convert_aa(protein.atoms[protein.molnums == first_a2].resnames) + \
residue_lookup.astype(str)
residue_lookup = list(residue_lookup)
residue_lookup *= len(np.unique(protein.molnums))
residue_lookup = np.array(residue_lookup)
# Used to collect all of the information about each
# contact
protein_contacts = []
lipid_contacts = []
ii = 0
for ts in u.trajectory[4000:]:
if ii % 1100 == 0:
print(int(((ii / 1100)) * 10), "% ", sep = '', end = '')
search_results = mda.lib.distances.capped_distance(protein.positions,
bilayer.positions, 6.0)
protein_sele = list(search_results[0][:, 0])
lipid_sele = list(search_results[0][:, 1])
distances = list(search_results[1])
if len(distances) > 0:
protein_contacts.append(residue_lookup[protein_sele])
lipid_contacts.append(bilayer.atoms[lipid_sele].resnames)
ii += 1
print('')
protein_contacts = np.concatenate(protein_contacts)
return(protein_contacts)
8.1.2) Present A2-Membrane Contact Results¶
def present_a2_membrane_contacts(a2_mem_contact_results):
# Combine all results and pool contact counts
all_a2_mem_contacts = np.concatenate([a2_mem_contact_results[key] for key in a2_a2_contact_results])
contacts_counts = np.unique(all_a2_mem_contacts, return_counts = True)
contacts = contacts_counts[0]
counts = contacts_counts[1]
# Collect information for making a table and pdb file
contact_info = {}
contact_info["Percentage"] = (counts / np.sum(counts)) * 100.
contact_info["Percentage"] = np.round(contact_info["Percentage"], 2).astype(str)
contact_info["Resnum"] = np.array([ii.strip("ABCDEFGHIJKLMNOPQRSTUVWXYZ") for ii in contacts])
contact_info["Resname"] = convert_aa(np.array([ii.strip("0123456789") for ii in contacts]))
contact_info["One Letter"] = convert_aa(contact_info["Resname"])
# Generate the table using pandas and present the top 20 entries
residue = contact_info["One Letter"] + \
(contact_info["Resnum"].astype(int) + 1).astype(str)
percentage = contact_info["Percentage"].astype(float)
contact_table = pd.DataFrame.from_dict({"Residue" : residue,
"Percentage" : percentage})
contact_table = contact_table.sort_values("Percentage",
ascending = False,
ignore_index = True)
print(contact_table.iloc[:20])
# Generate a pdb file with the b-factors replaced by the % of A2-A2
# contacts attributable to each residue
with open("analysis/contact/a2_coarse.pdb", 'r') as f:
contents = f.readlines()
with open("analysis/contact/membrane_contacts/contact.pdb", 'w') as f:
for line in contents:
for ii in range(len(contact_info["Resname"])):
pattern = r'(^.................'
pattern = pattern + contact_info["Resname"][ii]
pattern = pattern + ((6 - len(contact_info["Resnum"][ii])) * '.')
pattern = pattern + contact_info["Resnum"][ii]
pattern = pattern + ('.' * 36) + ')' + '0.00' + '(.*)'
replacement = r'\g<1>' + contact_info["Percentage"][ii] + '\g<2>'
line = re.sub(pattern, replacement, line)
f.writelines(line)
8.2) Perform Analysis¶
trajectories = get_trajectories()
a2_mem_contact_results = analyze_trajectories(protein_membrane_contacts,
trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
8.3) Present Table and Generate PDB¶
present_a2_membrane_contacts(a2_mem_contact_results)
Residue Percentage 0 K119 3.49 1 K206 3.45 2 K281 3.30 3 K49 3.24 4 K249 3.07 5 L121 2.95 6 K88 2.38 7 K246 2.35 8 Y151 2.34 9 K152 2.20 10 H94 2.15 11 R284 2.13 12 K324 1.98 13 S89 1.93 14 S92 1.88 15 K47 1.87 16 S161 1.79 17 K81 1.78 18 R245 1.78 19 K157 1.72 Residue K119K206K281K49K249L121K88K246Y151K152H94R284K... Percentage 47.78 dtype: object
8.4) Visualize Results¶
Open the resulting PDB file and run the spectrum b command to color the system based on the fraction of contacts.
9) Track Contacts Over Time¶
Plot fraction of contacts and total contacts for individual A2 with POPC, POPS, POP2, and CHOL over time.
9.1) Define Functions¶
9.1.1) Convert 1 to 3 Letter AA Code and Vice Versa¶
convert_aa = np.frompyfunc(mda.lib.util.convert_aa_code, 1, 1)
u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')
u.trajectory[333]
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
selection = "resname POP2"
bilayer = u.select_atoms(selection)
protein_molnums = list(set(protein.molnums))
protein_molnums.sort()
I_molnum = protein_molnums[-2]
I_molnum
a2_sele = protein.molnums == I_molnum
search_results = mda.lib.distances.capped_distance(
protein.atoms[a2_sele].positions,
bilayer.positions,
6.)
lipid_sele = list(search_results[0][:, 1])
lipid_num = bilayer.atoms.resnums[lipid_sele]
len(lipid_num)
#len(set(u.select_atoms("resname POP2").resnums))
35
u = mda.Universe('trajectory/image1.tpr', 'trajectory/reimaged1.xtc')
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
I_molnum =
<Universe with 58660 atoms>
9.1.2) Calculate the A2-Membrane Contact Counts¶
def lipid_contacts(u):
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
#--------------------------------------------------------------------
# Proteins to select for contact analysis
# A C D E F G I J
protein_molnums = list(set(protein.molnums))
protein_molnums.sort()
protein_codes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
protein_codes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
molnum_to_code = dict(zip(protein_molnums, protein_codes))
#--------------------------------------------------------------------
# Aggregate information for each A2
aggregated_contact_info = {}
for molnum in molnum_to_code:
code = molnum_to_code[molnum]
a2_sele = protein.molnums == molnum
a2_contacts = {"POPC" : [], "POPS" : [], "POP2" : [],
"CHOL" : [], "Total" : []}
ii = 0
for ts in u.trajectory:
if ii % track_divisor == 0:
print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
# Calculate contacts
search_results = mda.lib.distances.capped_distance(
protein.atoms[a2_sele].positions,
bilayer.positions,
6.)
lipid_sele = list(search_results[0][:, 1])
lipid_name = bilayer.atoms.resnames[lipid_sele]
a2_contacts["POPC"].append(np.count_nonzero(lipid_name == "POPC"))
a2_contacts["POPS"].append(np.count_nonzero(lipid_name == "POPS"))
a2_contacts["POP2"].append(np.count_nonzero(lipid_name == "POP2"))
a2_contacts["CHOL"].append(np.count_nonzero(lipid_name == "CHOL"))
ii += 1
a2_contacts["POPC"] = np.float64(np.array(a2_contacts["POPC"]))
a2_contacts["POPS"] = np.float64(np.array(a2_contacts["POPS"]))
a2_contacts["POP2"] = np.float64(np.array(a2_contacts["POP2"]))
a2_contacts["CHOL"] = np.float64(np.array(a2_contacts["CHOL"]))
a2_contacts["Total"] = a2_contacts["POPC"] + a2_contacts["POPS"] + \
a2_contacts["POP2"] + a2_contacts["CHOL"]
a2_contacts["Total"] = np.float64(a2_contacts["Total"])
aggregated_contact_info[code] = a2_contacts
print('')
return(aggregated_contact_info)
9.1.3) Present the Contact Counts for the First 6 us of the Trajectory¶
def present_early_mem_contacts(aggregated_contact_info):
np.seterr(all = "ignore")
title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E',
5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
kk = 0
for jj in range(0, 5):
for ii in range(0, 2):
num_popc = np.nan_to_num(aggregated_contact_info[kk]["POPC"])
num_pops = np.nan_to_num(aggregated_contact_info[kk]["POPS"])
num_pop2 = np.nan_to_num(aggregated_contact_info[kk]["POP2"])
num_chol = np.nan_to_num(aggregated_contact_info[kk]["CHOL"])
axs[jj, ii].plot(time[:6000] / (10 ** 6), num_popc[:6000], label = "POC")
axs[jj, ii].plot(time[:6000] / (10 ** 6), num_pops[:6000], label = "POS")
axs[jj, ii].plot(time[:6000] / (10 ** 6), num_pop2[:6000], label = "PIP2")
axs[jj, ii].plot(time[:6000] / (10 ** 6), num_chol[:6000], label = "CHL")
axs[jj, ii].set_title(title[kk])
axs[jj, ii].set_ylim([-10, 325])
if ii == 0:
axs[jj, ii].set_ylabel("Contact Count")
if (kk == 8) or (kk == 9):
axs[jj, ii].set_xlabel("Time (μs)")
kk += 1
handles, labels = axs[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc = "outside center right",
bbox_to_anchor = (1.08, 0.5))
plt.tight_layout()
plt.show()
9.1.4) Present Contact Count for the Full Trajectory¶
def present_late_mem_contacts(aggregated_contact_info):
np.seterr(all = "ignore")
title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E',
5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
kk = 0
for jj in range(0, 5):
for ii in range(0, 2):
num_popc = np.nan_to_num(aggregated_contact_info[kk]["POPC"])
num_pops = np.nan_to_num(aggregated_contact_info[kk]["POPS"])
num_pop2 = np.nan_to_num(aggregated_contact_info[kk]["POP2"])
num_chol = np.nan_to_num(aggregated_contact_info[kk]["CHOL"])
axs[jj, ii].plot(time / (10 ** 6), num_popc, label = "POC")
axs[jj, ii].plot(time / (10 ** 6), num_pops, label = "POS")
axs[jj, ii].plot(time / (10 ** 6), num_pop2, label = "PIP2")
axs[jj, ii].plot(time / (10 ** 6), num_chol, label = "CHL")
axs[jj, ii].set_title(title[kk])
axs[jj, ii].set_ylim([-10, 325])
if ii == 0:
axs[jj, ii].set_ylabel("Contact Count")
if (kk == 8) or (kk == 9):
axs[jj, ii].set_xlabel("Time (μs)")
kk += 1
handles, labels = axs[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc = "outside center right",
bbox_to_anchor = (1.08, 0.5))
plt.tight_layout()
plt.show()
9.1.5) Present the Percentage of Contacts¶
def present_fraction_mem_contacts(aggregated_contact_info):
np.seterr(all = "ignore")
title = {0 : 'A', 1 : 'B', 2 : 'C', 3 : 'D', 4 : 'E',
5 : 'F', 6 : 'G', 7 : 'H', 8 : 'I', 9 : 'J'}
fig, axs = plt.subplots(5, 2, figsize=(25, 30), sharey = True)
kk = 0
for jj in range(0, 5):
for ii in range(0, 2):
num_popc = np.nan_to_num(aggregated_contact_info[kk]["POPC"])
num_pops = np.nan_to_num(aggregated_contact_info[kk]["POPS"])
num_pop2 = np.nan_to_num(aggregated_contact_info[kk]["POP2"])
num_chol = np.nan_to_num(aggregated_contact_info[kk]["CHOL"])
total_contacts = num_popc + num_pops + num_pop2 + num_chol
frac_popc = (num_popc / total_contacts) * 100.
frac_pops = (num_pops / total_contacts) * 100.
frac_pop2 = (num_pop2 / total_contacts) * 100.
frac_chol = (num_chol / total_contacts) * 100.
axs[jj, ii].plot(time / (10 ** 6), frac_popc, label = "POC")
axs[jj, ii].plot(time / (10 ** 6), frac_pops, label = "POS")
axs[jj, ii].plot(time / (10 ** 6), frac_pop2, label = "PIP2")
axs[jj, ii].plot(time / (10 ** 6), frac_chol, label = "CHL")
axs[jj, ii].set_title(title[kk])
axs[jj, ii].axhline(50., linestyle = "dashed", color = "black")
axs[jj, ii].axhline(75., linestyle = "dashed", color = "black")
axs[jj, ii].axhline(90., linestyle = "dashed", color = "black")
if ii == 0:
axs[jj, ii].set_ylabel("Contact Percentage")
if (kk == 8) or (kk == 9):
axs[jj, ii].set_xlabel("Time (μs)")
kk += 1
handles, labels = axs[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc = "outside center right",
bbox_to_anchor = (1.08, 0.5))
plt.tight_layout()
plt.show()
9.2) Perform Analysis¶
#trajectories = get_trajectories()
#lipid_contact_results = analyze_trajectories(lipid_contacts,
# trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
9.3) Save Results¶
#for key in lipid_contact_results:
# data = {}
# for ii in range(0, 10):
# data["POPC" + str(ii)] = lipid_contact_results[key][ii]["POPC"]
# data["POPS" + str(ii)] = lipid_contact_results[key][ii]["POPS"]
# data["POP2" + str(ii)] = lipid_contact_results[key][ii]["POP2"]
# data["CHOL" + str(ii)] = lipid_contact_results[key][ii]["CHOL"]
# data["Total" + str(ii)] = lipid_contact_results[key][ii]["Total"]
# data = pd.DataFrame.from_dict(data)
# data.to_csv("analysis/contact_count/" + key + ".csv", index = False)
9.4) Read Results¶
prefix = "analysis/contact_count/"
lipid_contact_results = {}
for ii in range(1, 6):
traj_name = "trajectory" + str(ii)
lipid_contact_results[traj_name] = {}
for jj in range(0, 10):
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
lipid_contact_results[traj_name][jj] = {}
lipid_contact_results[traj_name][jj]["POPC"] = np.array(list(data["POPC" + str(jj)]))
lipid_contact_results[traj_name][jj]["POPS"] = np.array(list(data["POPS" + str(jj)]))
lipid_contact_results[traj_name][jj]["POP2"] = np.array(list(data["POP2" + str(jj)]))
lipid_contact_results[traj_name][jj]["CHOL"] = np.array(list(data["CHOL" + str(jj)]))
lipid_contact_results[traj_name][jj]["Total"] = np.array(list(data["Total" + str(jj)]))
9.5) Present Results¶
9.5.1) Plot First 6us of Contacts¶
present_analysis(present_early_mem_contacts, lipid_contact_results)
The following plots correcpond to: trajectory1
The following plots correcpond to: trajectory2
The following plots correcpond to: trajectory3
The following plots correcpond to: trajectory4
The following plots correcpond to: trajectory5
9.5.2) Plot All Contacts¶
present_analysis(present_late_mem_contacts, lipid_contact_results)
The following plots correcpond to: trajectory1
The following plots correcpond to: trajectory2
The following plots correcpond to: trajectory3
The following plots correcpond to: trajectory4
The following plots correcpond to: trajectory5
9.5.3) Plot Percentage of Contacts¶
present_analysis(present_fraction_mem_contacts, lipid_contact_results)
The following plots correcpond to: trajectory1
The following plots correcpond to: trajectory2
The following plots correcpond to: trajectory3
The following plots correcpond to: trajectory4
The following plots correcpond to: trajectory5
10) Lipid Frechet Mean for Clustering¶
10.1) Necessary Functions¶
10.1.1) Calculate the Frechet Variance¶
def calculate_lipid_variance(u):
def avg_pairwise_distance_squared(x):
angles = np.sum(frame * x, axis = 1)
angles = angles / \
(np.linalg.norm(x) * \
np.linalg.norm(frame, axis = 1))
angles = np.arccos(angles)
angles = np.nan_to_num(angles)
arclengths = angles * average_upper_radius
dist_squared = arclengths * arclengths
return(np.mean(dist_squared))
frame_skip = 10
average_upper_radius = 122.
# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
"(resname POPS and name PO4 CNO) or " + \
"(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
"(resname CHOL)"
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
lipid_head_com = {}
#for lip in ["POPC", "POPS", "POP2", "CHOL"]:
#for lip in ["POPC"]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
# Generate selection and np.array to store data.
lipid_sele = head_group.select_atoms("resname " + lip)
lipid_head_com[lip] = np.empty
lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames) + 1,
lipid_sele.n_residues,
3))
ii = 0
for ts in u.trajectory:
# Check progress
#if ii % track_divisor == 0:
if ii % 1500 == 0:
print(int(((ii / track_divisor) * 10)), "% ", sep = '', end = '')
lipid_sele.positions = lipid_sele.positions - bilayer.center_of_mass()
lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
ii += 1
print(' ')
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
head_com = lipid_head_com[lip]
# Get the magnitude of the COM vectors
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
# Use the average magnitude to seperate the distributions
upper_leaf_min = np.mean(dist_from_center)
upper_selection = dist_from_center > upper_leaf_min
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
# Define the upper-leaflet lipids
lipid_head_com[lip] = head_com[:, upper_selection, :]
variance_results = {}
for lip in ["POPC", "POPS", "CHOL", "POP2"]:
variance_results[lip] = []
ii = 0
for ts in u.trajectory:
# Check progress
#if ii % track_divisor == 0:
if ii % 1500 == 0:
print(int(((ii / track_divisor)) * 10), "% ",
sep = '', end = '')
frame = lipid_head_com[lip][ii, :, :]
frame = frame / np.linalg.norm(frame, axis = 1)[:, None]
frame = frame * average_upper_radius
cons = ({'type': 'eq', 'fun': lambda x: np.sqrt((x[0] ** 2) + (x[1] ** 2) + (x[2] ** 2)) - average_upper_radius})
optimization_results = []
aur = average_upper_radius
guesses = [[0, 0, aur], [0, 0, -aur],
[0, aur, 0], [0, -aur, 0],
[aur, 0, 0], [-aur, 0, 0]]
for jj in range(len(guesses)):
results = scipy.optimize.minimize(avg_pairwise_distance_squared, guesses[jj], constraints = cons)
optimization_results.append(np.append(results['x'], results["fun"]))
optimization_results = np.array(optimization_results)
optimization_results[optimization_results[:, 3].argsort()]
variance = optimization_results[0, 3]
variance_results[lip].append(variance)
ii += 1
print(' ')
return(variance_results)
10.1.2) Present the Frechet Variance Results¶
def present_lipid_variance(variance_results):
# Initialize plotting.
fig, ax = plt.subplots(figsize = (14, 7))
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Adjust figure size.
fig.set_size_inches(15, 4)
ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
# Plot data.
ax.plot(time / 10 ** 6, variance_results["POPC"], label = "POC")
ax.plot(time / 10 ** 6, variance_results["POPS"], label = "POS")
ax.plot(time / 10 ** 6, variance_results["CHOL"], label = "CHL")
ax.plot(time / 10 ** 6, variance_results["POP2"], label = "PIP2")
ax.set_ylim([20000, 47500])
ax.set_xlabel("Time (µs)")
ax.set_ylabel("Variance (Ų)")
ax.legend()
plt.show()
10.1.3) Combined Lipid Variance Results¶
def present_combined_lipid_variance(lipid_variance_results):
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(27, 21)
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]
for ii in range(0, 5):
traj = "trajectory" + str(ii + 1)
lip_var = lipid_variance_results[traj]
#prot_var = protein_variance_results[traj]
# Remove top and right spines from the figure.
axes[ii].spines["top"].set_visible(False)
axes[ii].spines["right"].set_visible(False)
# Plot data.
axes[ii].plot(time / 10 ** 6, lip_var["POPC"], label = "POC")
axes[ii].plot(time / 10 ** 6, lip_var["POPS"], label = "POS")
axes[ii].plot(time / 10 ** 6, lip_var["CHOL"], label = "CHL")
axes[ii].plot(time / 10 ** 6, lip_var["POP2"], label = "PIP2")
axes[ii].set_ylim([20000, 47500])
axes[ii].set_xlabel("Time (µs)")
axes[ii].set_ylabel("Variance (Ų)")
axes[ii].legend()
plt.tight_layout()
plt.show()
10.2) Perform Analysis¶
#trajectories = get_trajectories()
#lipid_variance_results = analyze_trajectories(calculate_lipid_variance,
# trajectories)
10.3) Save Results¶
#for key in lipid_variance_results:
# data = {}
# data["POPC"] = lipid_variance_results[key]["POPC"]
# data["POPS"] = lipid_variance_results[key]["POPS"]
# data["POP2"] = lipid_variance_results[key]["POP2"]
# data["CHOL"] = lipid_variance_results[key]["CHOL"]
# data = pd.DataFrame.from_dict(data)
# data.to_csv("analysis/variance/lipids/" + key + ".csv",
# index = False)
10.4) Read Results¶
prefix = "analysis/variance/lipids/"
lipid_variance_results = {}
for ii in range(1, 6):
traj_name = "trajectory" + str(ii)
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
lipid_variance_results[traj_name] = {}
lipid_variance_results[traj_name]["POPC"] = list(data.iloc[:, 0])
lipid_variance_results[traj_name]["POPS"] = list(data.iloc[:, 1])
lipid_variance_results[traj_name]["POP2"] = list(data.iloc[:, 2])
lipid_variance_results[traj_name]["CHOL"] = list(data.iloc[:, 3])
10.5) Present Results¶
present_analysis(present_lipid_variance, lipid_variance_results)
The following plots correcpond to: trajectory1
The following plots correcpond to: trajectory2
The following plots correcpond to: trajectory3
The following plots correcpond to: trajectory4
The following plots correcpond to: trajectory5
present_combined_lipid_variance(lipid_variance_results)
11) Protein Frechet Mean for Clustering¶
11.1) Necessary Functions¶
11.1.1) Calculate the Frechet Variance¶
def calculate_protein_variance(u):
average_upper_radius = 122.
bilayer = u.select_atoms("resname POPC or " + \
"resname POPS or " + \
"resname POP2 or " + \
"resname CHOL")
# Create selections for groups that may be used for analysis
protein = u.select_atoms("(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)")
#------------------------------------------
#protein_com = np.empty((protein.n_residues, int(u.trajectory.n_frames / 100), 3))
protein_com = np.empty((int(u.trajectory.n_frames) + 1, 10, 3))
ii = 0
for ts in u.trajectory:
# Check progress
#if ii % track_divisor == 0:
if ii % 1500 == 0:
print(int(((ii / track_divisor) * 10)), "% ", sep = '', end = '')
protein.positions = protein.positions - bilayer.center_of_mass()
protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
ii += 1
print(' ')
#------------------------------------------
def avg_pairwise_distance_squared(x):
angles = np.sum(frame * x, axis = 1)
angles = angles / \
(np.linalg.norm(x) * \
np.linalg.norm(frame, axis = 1))
angles = np.arccos(angles)
angles = np.nan_to_num(angles)
arclengths = angles * average_upper_radius
dist_squared = arclengths * arclengths
return(np.mean(dist_squared))
protein_variance_results = []
ii = 0
for ts in u.trajectory:
# Check progress
#if ii % track_divisor == 0:
if ii % 1500 == 0:
print(int(((ii / track_divisor) * 10)), "% ", sep = '', end = '')
frame = protein_com[ii, :, :]
cons = ({'type': 'eq', 'fun': lambda x: np.sqrt((x[0] ** 2) + (x[1] ** 2) + (x[2] ** 2)) - average_upper_radius})
optimization_results = []
aur = average_upper_radius
guesses = [[0, 0, aur], [0, 0, -aur],
[0, aur, 0], [0, -aur, 0],
[aur, 0, 0], [-aur, 0, 0]]
for jj in range(len(guesses)):
results = scipy.optimize.minimize(avg_pairwise_distance_squared, guesses[jj], constraints = cons)
optimization_results.append(np.append(results['x'], results["fun"]))
optimization_results = np.array(optimization_results)
optimization_results[optimization_results[:, 3].argsort()]
variance = optimization_results[0, 3]
protein_variance_results.append(variance)
ii += 1
print(' ')
return(protein_variance_results)
11.1.2) Present the Frechet Variance¶
def present_protein_variance(variance_results):
# Initialize plotting.
fig, ax = plt.subplots(figsize = (15, 4))
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Plot data.
ax.plot(time, variance_results)
ax.set_xlabel("Time (ns)")
ax.set_ylabel("Variance (Ų)")
ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt.show()
11.2) Perform Calculations¶
#trajectories = get_trajectories()
#protein_variance_results = analyze_trajectories(calculate_protein_variance,
# trajectories)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
11.3) Save Results¶
#for key in protein_variance_results:
# data = {}
# data["A2"] = protein_variance_results[key]
# data = pd.DataFrame.from_dict(data)
# data.to_csv("analysis/variance/protein/" + key + ".csv",
# index = False)
11.4) Read Results¶
prefix = "analysis/variance/protein/"
protein_variance_results = {}
for ii in range(1, 6):
traj_name = "trajectory" + str(ii)
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
protein_variance_results[traj_name] = {}
protein_variance_results[traj_name] = list(data.iloc[:, 0])
11.5) Present Results¶
present_analysis(present_protein_variance, protein_variance_results)
The following plots correcpond to: trajectory1
The following plots correcpond to: trajectory2
The following plots correcpond to: trajectory3
The following plots correcpond to: trajectory4
The following plots correcpond to: trajectory5
12) Lipid-Protein Variance Relationship¶
12.1) Overlain Variance vs Time Plots¶
for key in protein_variance_results:
print("The following plots correspond to:", key)
print(scipy.stats.pearsonr(protein_variance_results[key][:],
lipid_variance_results[key]["POP2"][:]))
# Initialize plotting.
fig, ax1 = plt.subplots(figsize = (7, 5))
# Remove top and right spines from the figure.
ax1.spines["top"].set_visible(False)
ax1.spines["right"].set_visible(False)
color = 'C4'
ax1.set_xlabel('Time (µs)')
ax1.set_ylabel('A2 Variance (Ų)')
ax1.tick_params(axis='y')
ax1.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt1 = ax1.plot(time[:] / (10 ** 6), protein_variance_results[key][:],
color = color, label = "A2")
ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis
color = 'C3'
ax2.set_ylabel('PIP2 Variance (Ų)')
ax2.tick_params(axis = 'y')
ax2.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt2 = ax2.plot(time[:] / (10 ** 6), lipid_variance_results[key]["POP2"][:],
color = color, label = "PIP2")
# added these three lines
lns = plt1 + plt2
labels = [l.get_label() for l in lns]
ax2.legend(lns, labels)
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.show()
The following plots correspond to: trajectory1 PearsonRResult(statistic=0.8098302446204506, pvalue=0.0)
The following plots correspond to: trajectory2 PearsonRResult(statistic=0.8910302251798473, pvalue=0.0)
The following plots correspond to: trajectory3 PearsonRResult(statistic=0.8000298892394614, pvalue=0.0)
The following plots correspond to: trajectory4 PearsonRResult(statistic=0.7429505201181323, pvalue=0.0)
The following plots correspond to: trajectory5 PearsonRResult(statistic=0.9361842846397979, pvalue=0.0)
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(27, 21)
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]
for ii in range(0, 5):
key = "trajectory" + str(ii + 1)
kk = ii + 5
# Remove top and right spines from the figure.
#axes[ii].spines["top"].set_visible(False)
#axes[ii].spines["right"].set_visible(False)
color = 'C4'
axes[ii].set_xlabel('Time (µs)')
axes[ii].set_ylabel('A2 Variance (Ų)')
axes[ii].tick_params(axis='y')
axes[ii].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt1 = axes[ii].plot(time[:] / (10 ** 6), protein_variance_results[key][:],
color = color, label = "A2")
axes.append(axes[ii].twinx()) # instantiate a second axes that shares the same x-axis
color = 'C3'
axes[kk].set_ylabel('PIP2 Variance (Ų)')
axes[kk].tick_params(axis = 'y')
axes[kk].ticklabel_format(style='sci', axis='y', scilimits=(0,0))
plt2 = axes[kk].plot(time[:] / (10 ** 6), lipid_variance_results[key]["POP2"][:],
color = color, label = "PIP2")
# added these three lines
lns = plt1 + plt2
labels = [l.get_label() for l in lns]
axes[kk].legend(lns, labels)
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.show()
12.2) Lipid vs Protein Variance¶
for key in protein_variance_results:
fig, ax = plt.subplots(figsize = (7, 5))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.scatter(protein_variance_results[key],
lipid_variance_results[key]["POP2"])
ax.set_xlabel("A2 Variance (Ų)")
ax.set_ylabel("PIP2 Variance (Ų)")
z = np.polyfit(protein_variance_results[key],
lipid_variance_results[key]["POP2"],
1)
p = np.poly1d(z)
ax.plot(protein_variance_results[key],
p(protein_variance_results[key]), ':',
color = "orange")
ax.ticklabel_format(style = "sci", axis = 'y',
scilimits = (0, 0))
ax.ticklabel_format(style = "sci", axis = 'x',
scilimits = (0, 0))
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(protein_variance_results[key],
lipid_variance_results[key]["POP2"])
print(r_value, p_value)
plt.show()
0.8098302446204508 0.0
0.8910302251798478 0.0
0.800029889239461 0.0
0.7429505201181322 0.0
0.9361842846397981 0.0
#---------------------------------------------------------
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(14, 21)
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]
for ii in range(0, 5):
key = "trajectory" + str(ii + 1)
axes[ii].spines["top"].set_visible(False)
axes[ii].spines["right"].set_visible(False)
axes[ii].scatter(protein_variance_results[key],
lipid_variance_results[key]["POP2"])
axes[ii].set_xlabel("A2 Variance (Ų)")
axes[ii].set_ylabel("PIP2 Variance (Ų)")
z = np.polyfit(protein_variance_results[key],
lipid_variance_results[key]["POP2"],
1)
p = np.poly1d(z)
axes[ii].plot(protein_variance_results[key],
p(protein_variance_results[key]), ':',
color = "orange")
axes[ii].ticklabel_format(style = "sci", axis = 'y',
scilimits = (0, 0))
axes[ii].ticklabel_format(style = "sci", axis = 'x',
scilimits = (0, 0))
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.show()
##------------------------------------------------
#for key in protein_variance_results:
# fig, ax = plt.subplots(figsize = (7, 5))
#
# ax.spines["top"].set_visible(False)
# ax.spines["right"].set_visible(False)
#
# ax.scatter(protein_variance_results[key],
# lipid_variance_results[key]["POP2"])
#
# ax.set_xlabel("A2 Variance (Ų)")
# ax.set_ylabel("POP2 Variance (Ų)")
#
# z = np.polyfit(protein_variance_results[key],
# lipid_variance_results[key]["POP2"],
# 1)
# p = np.poly1d(z)
#
# ax.plot(protein_variance_results[key],
# p(protein_variance_results[key]), ':',
# color = "orange")
#
# ax.ticklabel_format(style = "sci", axis = 'y',
# scilimits = (0, 0))
# ax.ticklabel_format(style = "sci", axis = 'x',
# scilimits = (0, 0))
#
# slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(protein_variance_results[key],
# lipid_variance_results[key]["POP2"])
#
# plt.show()
13) Radial Distribution for A2 Height from¶
13.1) Necessary Functions¶
13.1.1) Universe Transformation to Switch an Atom's Coordinates with the System's COM¶
class replace_with_COM:
"""Replace special atom `atomname` in each fragment with COM of the fragment."""
def __init__(self, bilayer, selection):
self.bilayer = bilayer
self.com_atoms = bilayer.select_atoms(selection)
# sanity check
#print(self.com_atoms.positions.shape)
#print(self.get_com().shape)
#assert self.get_com().shape == self.com_atoms.positions.shape
def get_com(self):
return self.bilayer.center_of_mass()
def __call__(self, ts):
self.com_atoms.positions = np.array([list(self.get_com())])
return ts
13.1.2) Calculate Combined RDF for A2 and the Vesicle¶
def calculate_combined_rdf(ucom):
# Create selections for groups that may be used for analysis
selection = "(resname POPC) or (resname POPS) or " + \
"(resname POP2)"
lipids = ucom.select_atoms(selection)
selection = "(protein)"
a2 = ucom.select_atoms(selection)
bilayer = ucom.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
selection = "resnum 1 and name C4A"
ucom.trajectory.add_transformations(replace_with_COM(bilayer, selection))
#------------------------------------------------------------------
selection = "resnum 1 and name C4A"
com_atoms = ucom.select_atoms(selection)
rdf_lipids = mda.analysis.rdf.InterRDF(com_atoms, lipids, nbins = 1000, range = (65., 180.), verbose = True)
rdf_lipids.run(start = 4000)
rdf_a2 = mda.analysis.rdf.InterRDF(com_atoms, a2, nbins = 1000, range = (65., 180.), verbose = True)
rdf_a2.run(start = 4000)
#------------------------------------------------------------------
rdf_lipids.results["rdf"][0] = 0.0
rdf_a2.results["rdf"][0] = 0.0
return([rdf_lipids, rdf_a2])
13.1.3) Present the Combined RDF¶
def present_coupled_rdf(combined_rdf_results):
rdf_lipids = combined_rdf_results[0]
rdf_a2 = combined_rdf_results[1]
lipid_peaks, _ = find_peaks(rdf_lipids["rdf"], width = 20)
a2_peaks, _ = find_peaks(rdf_a2["rdf"], width = 20)
lower_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[0]]
lower_leaf_rad = rdf_lipids["bins"][lipid_peaks[0]]
upper_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[1]]
upper_leaf_rad = rdf_lipids["bins"][lipid_peaks[1]]
a2_rdf = rdf_a2["rdf"][a2_peaks[0]]
a2_rad = rdf_a2["bins"][a2_peaks[0]]
a2_vesi_dist = np.round(a2_rad - upper_leaf_rad, 2)
fig, ax = plt.subplots(figsize = (17, 4))
ax.plot(rdf_lipids["bins"], rdf_lipids["rdf"])
ax.plot(rdf_a2["bins"], rdf_a2["rdf"])
ax.set_xlabel("Distance from Vesicle Center (Ã…)")
ax.set_ylabel("g(r)")
ax.plot((lower_leaf_rad, lower_leaf_rad), (0., lower_leaf_rdf),
linestyle = "--", color = 'k')
ax.plot((upper_leaf_rad, upper_leaf_rad), (0., upper_leaf_rdf),
linestyle = "--", color = 'k')
ax.plot((a2_rad, a2_rad), (0., a2_rdf), linestyle = "--", color = 'k')
ax.plot((upper_leaf_rad, a2_rad), (a2_rdf, a2_rdf),
linestyle = "--", color = 'k')
ax.text(a2_rad - 17.2, a2_rdf + 0.25, str(a2_vesi_dist))
# Legend lines
vesicle_line = mlines.Line2D([], [], color = "tab:blue", label = "Vesicle")
a2_line = mlines.Line2D([], [], color = "tab:orange", label = "A2")
ax.legend(handles = [vesicle_line, a2_line])
plt.show()
def combined_coupled_rdf_plot(rdf_results):
fig = plt.figure()
# Adjust figure size.
fig.set_size_inches(30, 21)
spec = mpl.gridspec.GridSpec(ncols=4, nrows=3) # 6 columns evenly divides both 2 & 3
ax1 = fig.add_subplot(spec[0,0:2]) # row 0 with axes spanning 2 cols on evens
ax2 = fig.add_subplot(spec[0,2:4])
ax3 = fig.add_subplot(spec[1,0:2]) # row 0 with axes spanning 2 cols on odds
ax4 = fig.add_subplot(spec[1,2:4])
ax5 = fig.add_subplot(spec[2,1:3])
axes = [ax1, ax2, ax3, ax4, ax5]
for ii in range(0, 5):
trajectory = "trajectory" + str(ii + 1)
plot_data = rdf_results[trajectory]
rdf_lipids = plot_data[0]
rdf_a2 = plot_data[1]
lipid_peaks, _ = find_peaks(rdf_lipids["rdf"], width = 20)
a2_peaks, _ = find_peaks(rdf_a2["rdf"], width = 20)
lower_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[0]]
lower_leaf_rad = rdf_lipids["bins"][lipid_peaks[0]]
upper_leaf_rdf = rdf_lipids["rdf"][lipid_peaks[1]]
upper_leaf_rad = rdf_lipids["bins"][lipid_peaks[1]]
a2_rdf = rdf_a2["rdf"][a2_peaks[0]]
a2_rad = rdf_a2["bins"][a2_peaks[0]]
a2_vesi_dist = np.round(a2_rad - upper_leaf_rad, 2)
axes[ii].plot(rdf_lipids["bins"], rdf_lipids["rdf"])
axes[ii].plot(rdf_a2["bins"], rdf_a2["rdf"])
axes[ii].set_xlabel("Distance from Vesicle Center (Ã…)")
axes[ii].set_ylabel("g(r)")
axes[ii].yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f'))
axes[ii].plot((lower_leaf_rad, lower_leaf_rad), (0., lower_leaf_rdf),
linestyle = "--", color = 'k')
axes[ii].plot((upper_leaf_rad, upper_leaf_rad), (0., upper_leaf_rdf),
linestyle = "--", color = 'k')
axes[ii].plot((a2_rad, a2_rad), (0., a2_rdf), linestyle = "--", color = 'k')
axes[ii].plot((upper_leaf_rad, a2_rad), (a2_rdf, a2_rdf),
linestyle = "--", color = 'k')
axes[ii].text(a2_rad - 17.2, a2_rdf + 0.25, str(a2_vesi_dist))
plt.tight_layout()
plt.show()
13.2) Perform Calculations¶
trajectories = get_trajectories()
combined_rdf_results = analyze_trajectories(calculate_combined_rdf,
trajectories)
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
0%| | 0/11001 [00:00<?, ?it/s]
13.3) Save Results¶
for key in combined_rdf_results:
data = {}
data["RDF_lipids"] = combined_rdf_results[key][0].results.rdf
data["RDF_a2"] = combined_rdf_results[key][1].results.rdf
data["Location"] = combined_rdf_results[key][0].results.bins
data = pd.DataFrame.from_dict(data)
data.to_csv("analysis/rdf/combined/" + key + ".csv", index = False)
13.3) Read Results¶
prefix = "analysis/rdf/combined/"
combined_rdf_results = {}
for ii in range(1, 6):
traj_name = "trajectory" + str(ii)
combined_rdf_results[traj_name] = []
combined_rdf_results[traj_name].append({})
combined_rdf_results[traj_name].append({})
data = pd.read_csv(prefix + "trajectory" + str(ii) + ".csv")
combined_rdf_results[traj_name][0]["bins"] = list(data.iloc[:, 2])
combined_rdf_results[traj_name][0]["rdf"] = list(data.iloc[:, 0])
combined_rdf_results[traj_name][1]["bins"] = list(data.iloc[:, 2])
combined_rdf_results[traj_name][1]["rdf"] = list(data.iloc[:, 1])
13.4) Present Results¶
combined_coupled_rdf_plot(combined_rdf_results)
present_analysis(present_coupled_rdf, combined_rdf_results)
The following plots correcpond to: trajectory1
The following plots correcpond to: trajectory2
The following plots correcpond to: trajectory3
The following plots correcpond to: trajectory4
The following plots correcpond to: trajectory5
13.5) Average Height¶
mean = np.mean([27.26, 26.68, 26.45, 25.42, 26.57])
std = np.std([27.26, 26.68, 26.45, 25.42, 26.57])
stderr_x2 = np.std([27.26, 26.68, 26.45, 25.42, 26.57]) / np.sqrt(5.)
print("The mean height is:", mean, '±', stderr_x2)
The mean height is: 26.476 ± 0.26690222929005286
Overflow¶
These didn't make it into the final manuscript but there were some fun ideas in here. I came up with the lipid entropy idea after taking a machine learning course. I thought the protein orientation one was also interesting but propably better applied to another system.
4) Estimating the Vesicle Radius¶
4.1) Reinitialize the Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
"(resname POPS and name PO4 CNO) or " + \
"(resname POP2 and name P1 P2 C1 C2 C3 PO4)"
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
4.2) Collect Center of Mass for Each Head Group¶
head_group_com = np.empty((int(u.trajectory.n_frames / 100) + 1, head_group.n_residues, 3))
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
head_group.positions = head_group.positions - bilayer.center_of_mass()
head_group_com[ii, :] = head_group.center_of_mass(compound = 'residues')
ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
4.3) Split Data into Upper and Lower Leaflets¶
# Get the magnitude of the COM vectors
dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))
dist_from_center.sort()
# Distribution should be bimodal with a health split between the distributions.
# We can exploit this to determine at exactly what point the split occurs
max_diff = np.max(dist_from_center[1:] - dist_from_center[0:-1])
diff = dist_from_center[1:] - dist_from_center[0:-1]
ii = 0
for ii in range(len(diff)):
if diff[ii] == max_diff:
break
ii += 1
ii += 1
lower_bound = ii
# Use the determined lower-bound for the upper leaflet lipids to
# generate a list for selecting the lipids of the upper leaflet
upper_leaflet_min = dist_from_center[lower_bound]
dist_from_center = np.sqrt(np.sum(head_group_com[0, :, :] * head_group_com[0, :, :], axis = 1))
upper_selection = dist_from_center >= upper_leaflet_min
upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
lower_selection = dist_from_center < upper_leaflet_min
lower_selection = [ii for ii in range(len(lower_selection)) if lower_selection[ii]]
# Define the upper-leaflet lipids
upper_head_group_com = head_group_com[:, upper_selection, :]
lower_head_group_com = head_group_com[:, lower_selection, :]
# Initialize plotting.
fig, ax = plt.subplots()
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Adjust figure size.
#fig.set_size_inches(15, 4)
# Plot data.
ax.hist(dist_from_center)
# Set labels
ax.set_xlabel("Distance from Center (Ã…)")
ax.set_ylabel("Count")
plt.show()
4.4) Calculate Average Radius Per Frame¶
upper_leaf_radius = []
lower_leaf_radius = []
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
radius = np.linalg.norm(upper_head_group_com[ii, :, :], axis = 1)
radius = np.mean(radius)
upper_leaf_radius.append(radius)
radius = np.linalg.norm(lower_head_group_com[ii, :, :], axis = 1)
radius = np.mean(radius)
lower_leaf_radius.append(radius)
ii += 1
print(' ')
lower_leaf_radius = np.array(lower_leaf_radius)
upper_leaf_radius = np.array(upper_leaf_radius)
vesicle_thickness = upper_leaf_radius - lower_leaf_radius
average_lower_radius = np.mean(lower_leaf_radius)
average_upper_radius = np.mean(upper_leaf_radius )
average_thickness = np.mean(vesicle_thickness)
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
4.5) Average Outer Radius vs Time¶
plot_data = [upper_leaf_radius, lower_leaf_radius, vesicle_thickness]
plot_title = ["Upper Leaflet Radius", "Lower Leaflet Radius",
"Vesicle Thickness"]
plot_value = [round(average_upper_radius, 2),
round(average_lower_radius, 2),
round(average_thickness, 2)]
plot_coord = [(-0.3, 197.75), (-0.3, 157.55), (-0.3, 40.225)]
# Initialize plotting.
fig, axs = plt.subplots(3, 1, figsize=(25, 20))
for ii in range(len(plot_data)):
# Remove top and right spines from the figure.
axs[ii].spines["top"].set_visible(False)
axs[ii].spines["right"].set_visible(False)
axs[ii].plot(time, plot_data[ii])
axs[ii].axhline(plot_value[ii], color = "blue",linestyle = "--")
axs[ii].annotate(str(plot_value[ii]), xy = plot_coord[ii],
xytext = plot_coord[ii])
axs[ii].set_title(plot_title[ii])
if ii == 1:
axs[ii].set_ylabel("Avg Distance (Ã…)")
axs[ii].set_xlabel("Time (μs)")
plt.show()
print(average_upper_radius)
197.7944349368595
6) Lipid Pairwise Arc Length¶
6.1) Reinitialize the Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
"(resname POPS and name PO4 CNO) or " + \
"(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
"(resname CHOL)"
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
6.2) Collect Lipid Center of Mass Data¶
lipid_head_com = {}
#for lip in ["POPC", "POPS", "POP2", "CHOL"]:
#for lip in ["POPC"]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
# Generate selection and np.array to store data.
lipid_sele = head_group.select_atoms("resname " + lip)
lipid_head_com[lip] = np.empty
lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames / frame_skip) + 1,
lipid_sele.n_residues,
3))
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0:
print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
lipid_sele.positions = lipid_sele.positions - bilayer.center_of_mass()
lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
6.3) Get Upper Leaflet Head Group COMs¶
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
head_com = lipid_head_com[lip]
# Get the magnitude of the COM vectors
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
# Use the average magnitude to seperate the distributions
upper_leaf_min = np.mean(dist_from_center)
upper_selection = dist_from_center > upper_leaf_min
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
# Define the upper-leaflet lipids
lipid_head_com[lip] = head_com[:, upper_selection, :]
6.4) Function to Generate all Pairwise Combinations of Points¶
def numpy_pairwise_combinations(x):
idx = np.stack(np.triu_indices(len(x), k=1), axis=-1)
return x[idx]
6.5) Calculate the Average Pairwise Distance¶
# Used to store arc-length results for each lipid.
arc_length_results = {}
for lip in lipid_head_com:
arc_length_results[lip] = []
ii = 0
for ts in u.trajectory[::1000]:
# Check progress
if ii % 21 == 0:
#print(int(((ii / track_divisor)) * 10), "% ",
print(int(((ii / 21)) * 10), "% ",
sep = '', end = '')
frame = lipid_head_com[lip][ii, :, :]
combinations = numpy_pairwise_combinations(frame)
combinations
angles = np.sum(combinations[:, 0, :] * combinations[:, 1, :], axis = 1)
angles = angles / \
(np.linalg.norm(combinations[:, 0, :], axis = 1) * \
np.linalg.norm(combinations[:, 1, :], axis = 1))
angles = np.arccos(angles)
arclengths = angles * average_upper_radius
arc_length_results[lip].append(np.mean(arclengths))
ii += 10
print(' ')
0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100% 0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100% 0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100% 0% 100% 200% 300% 400% 500% 600% 700% 800% 900% 1000% 1100%
6.6) Plot the Average Pairwise Distance¶
# Initialize plotting.
fig, axs = plt.subplots(4, 1, figsize = (15, 25))
ii = 0
for lip in arc_length_results:
# Remove top and right spines from the figure.
axs[ii].spines["top"].set_visible(False)
axs[ii].spines["right"].set_visible(False)
# Set y range
axs[ii].set_ylim(300.0, 314.0)
# Plot data.
axs[ii].plot(time[::10], arc_length_results[lip])
# Set labels
if ii == 0:
axs[ii].set_ylabel("Avg Pairwise Dist (Ã…)")
# Calculate equation for trendline
z = np.polyfit(time[::10], arc_length_results[lip], 1)
p = np.poly1d(z)
# Get r^2 of the fit and other statistics
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(time[::10], arc_length_results[lip])
print(lip, slope)
# Add r^2 value to the plot
axs[ii].annotate(f"R² = {round(r_value, 2)}", xy = (0., 309.5),
xytext = (0., 309.5))
# Add trendline to plot
axs[ii].plot(time[::10], p(time[::10]), linestyle = "dashed",
color = "orange")
ii += 1
#axs[ii].set_xlabel("Time (us)")
plt.show()
POPC -0.003604287106444494 POPS -0.003312059177950976 POP2 -0.2804137219387939 CHOL -0.003930015608562447
7) Lipid Entropy¶
7.1) Reinitialize Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
"(resname POPS and name PO4 CNO) or " + \
"(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
"(resname CHOL)"
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
7.2) Collect PIP2 Center of Mass Data¶
lipid_head_com = {}
#for lip in ["POPC", "POPS", "POP2", "CHOL"]:
#for lip in ["POPC"]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
# Generate selection and np.array to store data.
lipid_sele = head_group.select_atoms("resname " + lip)
lipid_head_com[lip] = np.empty
lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames / frame_skip) + 1,
lipid_sele.n_residues,
3))
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0:
print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
lipid_sele.positions = lipid_sele.positions - bilayer.center_of_mass()
lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
7.3) Split Lipids by Leaflet¶
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
head_com = lipid_head_com[lip]
# Get the magnitude of the COM vectors
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
# Use the average magnitude to seperate the distributions
upper_leaf_min = np.mean(dist_from_center)
upper_selection = dist_from_center > upper_leaf_min
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
# Define the upper-leaflet lipids
lipid_head_com[lip] = head_com[:, upper_selection, :]
7.4) Function to Evenly Distribute Points on a Sphere¶
def fibonacci_sphere(samples=1000):
points = []
phi = math.pi * (3. - math.sqrt(5.)) # golden angle in radians
for i in range(samples):
y = 1 - (i / float(samples - 1)) * 2 # y goes from 1 to -1
radius = math.sqrt(1 - y * y) # radius at y
theta = phi * i # golden angle increment
x = math.cos(theta) * radius
z = math.sin(theta) * radius
points.append(np.array([x, y, z]))
points = np.array(points)
return points
7.5) Calculate the Entropy¶
from sklearn.neighbors import NearestNeighbors
vesicle_surface_area = 4 * np.pi * (average_upper_radius ** 2)
number_lipids = len(lipid_head_com["POPC"][0, :, :]) + \
len(lipid_head_com["POPS"][0, :, :]) + \
len(lipid_head_com["POP2"][0, :, :]) + \
len(lipid_head_com["CHOL"][0, :, :])
area_per_lipid = vesicle_surface_area / number_lipids
entropy_results = {}
for lip in lipid_head_com:
entropy_results[lip] = []
number_lipids = len(lipid_head_com[lip][0, :, :])
one_cluster_area = len(lipid_head_com[lip][0, :, :]) * \
area_per_lipid
num_bins = int(np.round(vesicle_surface_area / \
one_cluster_area))
print(lip, ': ', num_bins)
uniform_sphere = fibonacci_sphere(samples = num_bins)
groups = np.arange(0, len(uniform_sphere), 1)
neigh = NearestNeighbors(n_neighbors = 1)
neigh.fit(uniform_sphere, groups)
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
frame = lipid_head_com[lip][ii, :, :]
frame_norm = np.linalg.norm(frame, axis = 1)
frame = frame / frame_norm[:, None]
frame
dist, group = neigh.kneighbors(frame)
unique, counts = np.unique(group, return_counts=True)
counts = np.array(list(dict(zip(unique, counts)).values()))
p = counts / number_lipids
log_p = np.log(p)
entropy = -1. * np.sum(p * log_p)
entropy_results[lip].append(entropy)
ii += 1
print(' ')
POPC : 2 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% POPS : 5 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% POP2 : 12 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% CHOL : 10 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
# Initialize plotting.
fig, axs = plt.subplots(3, 1, figsize = (15, 25))
ii = 0
#for lip in entropy_results:
for lip in ["POPS", "POP2", "CHOL"]:
# Remove top and right spines from the figure.
axs[ii].spines["top"].set_visible(False)
axs[ii].spines["right"].set_visible(False)
# Set y range
#axs[ii].set_ylim(310.0, 312.0)
# Plot data.
axs[ii].plot(time, entropy_results[lip])
# Set labels
if ii == 1:
axs[ii].set_ylabel("Avg Pairwise Dist (Ã…)")
# Calculate equation for trendline
z = np.polyfit(time, entropy_results[lip], 1)
p = np.poly1d(z)
# Get r^2 of the fit and other statistics
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(time, entropy_results[lip])
print(lip, slope, r_value ** 2)
# Add r^2 value to the plot
axs[ii].annotate(f"R² = {round(r_value, 2)}", xy = (0., 309.5),
xytext = (0., 309.5))
# Add trendline to plot
axs[ii].plot(time, p(time), linestyle = "dashed",
color = "orange")
#if ii == 2:
ii += 1
#axs[ii].set_xlabel("Time (us)")
plt.show()
POPS 1.2979219925472855e-05 0.0025050808607435648 POP2 -0.0019054516556743983 0.4351049078286086 CHOL 0.00018085970378770206 0.05351522879602239
7) Protein Pairwise Arc Length¶
7.1) Reinitialize the Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
7.2) Collect Protein Center of Mass Data¶
protein_com = np.empty((int(u.trajectory.n_frames / frame_skip) + 1, 10, 3))
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
protein.positions = protein.positions - bilayer.center_of_mass()
protein_com[ii, :] = protein.center_of_mass(compound = 'molecules')
ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
7.3) Function to Generate all Pairwise Combinations of Points¶
def numpy_pairwise_combinations(x):
idx = np.stack(np.triu_indices(len(x), k=1), axis=-1)
return x[idx]
7.4) Calculate the Average Pairwise Distance¶
avg_arclength_protein = []
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
frame = protein_com[ii, :, :]
combinations = numpy_pairwise_combinations(frame)
combinations
angles = np.sum(combinations[:, 0, :] * combinations[:, 1, :], axis = 1)
angles = angles / (np.linalg.norm(combinations[:, 0, :], axis = 1) * np.linalg.norm(combinations[:, 1, :], axis = 1))
angles = np.arccos(angles)
arclengths = angles * average_upper_radius
avg_arclength_protein.append(np.mean(arclengths))
ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
7.5) Plot the Average Pairwise Distance¶
# Initialize plotting.
fig, ax = plt.subplots()
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Adjust figure size.
fig.set_size_inches(15, 4)
# Plot data.
ax.plot(time[550:], avg_arclength_protein[550:])
#ax.set_xlim([5.0, 18.8])
#ax.set_ylim([290, 330])
# Set labels
ax.set_xlabel("Time (us)")
ax.set_ylabel("Avg Pairwise Dist (Ã…)")
# Calculate equation for trendline
z = np.polyfit(time[550:], avg_arclength_protein[550:], 1)
p = np.poly1d(z)
slope, intercept, r_value, p_value, std_err = \
scipy.stats.linregress(time[550:], avg_arclength_protein[550:])
# Add trendline to plot
ax.plot(time, p(time), linestyle = "dashed",color = "orange")
ax.annotate("R² = " + str(round(r_value, 2)), xy = (5.6, 315.),
xytext = (5.6, 305.))
plt.show()
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(time[550:], avg_arclength_protein[550:])
print("The slope is: ", slope)
print("The r-squared value is: %f" % (r_value * r_value))
print("The p-value is: %f" % p_value)
The slope is: -4.497303994825977 The r-squared value is: 0.751070 The p-value is: 0.000000
8) Protein-Lipid Pairwise Arc-Length Relationship¶
# Initialize plotting.
fig, ax = plt.subplots()
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Adjust figure size.
fig.set_size_inches(9, 4)
# Plot data.
ax.scatter(avg_arclength_protein[550:], avg_arclength_pop2[550:])
# Set labels
ax.set_xlabel("Time (us)")
ax.set_ylabel("Avg Pairwise Dist (Ã…)")
# Calculate equation for trendline
z = np.polyfit(avg_arclength_protein[550:], avg_arclength_pop2[550:], 1)
p = np.poly1d(z)
# Add trendline to plot
ax.plot(avg_arclength_protein[550:], p(avg_arclength_protein[550:]), color = "orange")
plt.show()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [33], in <cell line: 12>() 9 fig.set_size_inches(9, 4) 11 # Plot data. ---> 12 ax.scatter(avg_arclength_protein[550:], avg_arclength_pop2[550:]) 14 # Set labels 15 ax.set_xlabel("Time (us)") NameError: name 'avg_arclength_pop2' is not defined
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(avg_arclength_protein[550:], avg_arclength_pop2[550:])
print("The slope is: ", slope)
print("The r-squared value is: %f" % (r_value * r_value))
print("The p-value is: %f" % p_value)
9) Lipid Diffusion Coefficients¶
Re-Initialize the Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
Collect Bilayer Coordinates¶
bilayer_coms = np.empty((int(u.trajectory.n_frames / frame_skip) + 1, bilayer.n_residues, 3))
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
bilayer.positions = bilayer.positions - bilayer.center_of_mass()
bilayer_coms[ii, :] = bilayer.center_of_mass(compound = 'residues')
ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
len(bilayer_coms)
2393
Calculate MSD using Arc Length¶
# Used to collect MSD data
msd = {"POPC" : [], "POPS" : [], "POP2" : [], "CHOL" : [], "ALL" : []}
# Generate selection indicies for each of the individual lipids
popc_sele = bilayer.atoms[bilayer.atoms.resnames == "POPC"].resnums
popc_sele = np.unique(popc_sele - 1)
pops_sele = bilayer.atoms[bilayer.atoms.resnames == "POPS"].resnums
pops_sele = np.unique(pops_sele - 1)
pop2_sele = bilayer.atoms[bilayer.atoms.resnames == "POP2"].resnums
pop2_sele = np.unique(pop2_sele - 1)
chol_sele = bilayer.atoms[bilayer.atoms.resnames == "CHOL"].resnums
chol_sele = np.unique(chol_sele - 1)
# Progress tracker
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0: print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
# Get info for distance calc
ref_frame = bilayer_coms[1500, :, :]
frame = bilayer_coms[ii, :, :]
# Calculate arc length with reference to initial position
angles = np.sum(ref_frame * frame, axis = 1)
angles = angles / (np.linalg.norm(frame, axis = 1) * np.linalg.norm(ref_frame, axis = 1))
angles = np.arccos(angles)
arclengths = angles * average_upper_radius
# Calculate the MSD for all lipids as specific lipids
msd["ALL"].append(np.mean(arclengths * arclengths))
msd["POPC"].append(np.mean(arclengths[popc_sele] * \
arclengths[popc_sele]))
msd["POPS"].append(np.mean(arclengths[pops_sele] * \
arclengths[pops_sele]))
msd["POP2"].append(np.mean(arclengths[pop2_sele] * \
arclengths[pop2_sele]))
msd["CHOL"].append(np.mean(arclengths[chol_sele] * \
arclengths[chol_sele]))
ii += 1
print(' ')
# Change the 'nan' msd calculation to
msd["POPC"][0] = 0.
msd["POPS"][0] = 0.
msd["POP2"][0] = 0.
msd["CHOL"][0] = 0.
msd["ALL"][0] = 0.
# Convert lists to np arrays
msd["POPC"] = np.array(msd["POPC"])
msd["POPS"] = np.array(msd["POPS"])
msd["POP2"] = np.array(msd["POP2"])
msd["CHOL"] = np.array(msd["CHOL"])
msd["ALL"] = np.array(msd["ALL"])
0% 10% 20% 30% 40% 50% 60%
/tmp/ipykernel_151103/3090153956.py:27: RuntimeWarning: invalid value encountered in arccos angles = np.arccos(angles)
70% 80% 90% 100%
fig, ax = plt.subplots()
#ax.set_xlim([10 ** 0,10 ** 6])
#ax.set_ylim([10 ** 0,10 ** 6])
ax.plot(time[1000:1100] * 1000., msd["POPC"][1000:1100], label = "POPC")
ax.plot(time[1000:1100] * 1000., msd["POPS"][1000:1100], label = "POPS")
ax.plot(time[1000:1100] * 1000., msd["POP2"][1000:1100], label = "POP2")
ax.plot(time[1000:1100] * 1000., msd["CHOL"][1000:1100], label = "CHOL")
ax.plot(time[1000:1100] * 1000., msd["ALL"][1000:1100], label = "ALL")
#ax.axvline(100., color = "red",linestyle = "--")
#ax.axvline(2500., color = "red",linestyle = "--")
ax.set_xlabel("Time (ns)")
ax.set_ylabel("Mean Squared\nDisplacement (Ų)")
ax.legend()
plt.show()
Check for Linear Region of Log Plot¶
fig, ax = plt.subplots()
ax.set_xlim([10 ** 0,10 ** 6])
ax.set_ylim([10 ** 0,10 ** 6])
ax.loglog(time[1:] * 1000., msd["POPC"][1:], label = "POPC")
ax.loglog(time[1:] * 1000., msd["POPS"][1:], label = "POPS")
ax.loglog(time[1:] * 1000., msd["POP2"][1:], label = "POP2")
ax.loglog(time[1:] * 1000., msd["CHOL"][1:], label = "CHOL")
ax.loglog(time[1:] * 1000., msd["ALL"][1:], label = "ALL")
ax.axvline(100., color = "red",linestyle = "--")
ax.axvline(2500., color = "red",linestyle = "--")
ax.set_xlabel("Time (ns)")
ax.set_ylabel("Mean Squared\nDisplacement (Ų)")
ax.legend()
plt.show()
Plot MSD with Trendline for Selected Region¶
#slope, intercept, r_value, p_value, std_err = \
#scipy.stats.linregress(time[lower_bound:upper_bound],
# msd[current_key][lower_bound:upper_bound])
time[lower_bound:upper_bound]
msd[current_key][lower_bound:upper_bound]
array([ nan, 280.06406534, 583.65668546, 816.04020584,
1160.667803 , 1443.29150359, 1771.9387194 , 2127.31425819,
2492.43716227, 2669.20407497, 2878.33354155, 3279.87212495,
3532.89234283, 3967.59264187, 4192.77985047, 4289.01416178,
4930.23360422, 5278.8645885 , 5408.08195404, 5739.30960802,
5985.18602883, 6579.54776716, 6930.68218801, 6881.98175111,
6905.29461471, 7286.99399176, 7275.4894873 , 7655.39546011,
8100.35014169, 8517.80986959, 9229.22802286, 9507.88342198,
9391.09243177, 9769.91707169, 9903.30571437, 10320.87419198,
10497.44061686, 11127.85858315, 11691.93110884, 11631.31567832,
12133.38798729, 11895.61031238, 12327.87113712, 11791.58101714,
12049.87881323, 12162.35067832, 12407.73324873, 12637.00144672,
12947.56091774, 12973.3899526 , 12935.06393963, 13078.72944584,
13470.2450592 , 13551.60587073, 13499.61964139, 13739.79629186,
14080.80555569, 14361.26167888, 14705.22881079, 14756.34082044,
14963.49925294, 15338.59148305, 15568.45487487, 15660.22166779,
15917.19097419, 16232.4183077 , 16663.5394359 , 16873.50587676,
17132.84066933, 17679.38672097, 17987.86015596, 18436.56878276,
18526.96330888, 18901.4453793 , 19465.50019425, 19982.09662542,
20160.32426309, 20784.13187344, 21110.27005857, 21302.51357703,
21682.53090061, 21818.69168877, 21839.27697413, 21891.78236872,
21676.77867584, 22267.99183759, 22543.82595131, 22697.06126018,
22395.73117373, 22546.9058549 , 22385.14086497, 22712.54989915,
23016.30378819, 23299.67508273, 23699.94374579, 23708.86942405,
24261.99146611, 24340.1150816 , 24610.5859035 , 24898.46218483])
# Determine index for selection of data
#lower_sele = (time * 1000.) > 100
#upper_sele = (time * 1000.) < 10 ** 6
#
#lower_sele = [ii for ii in range(len(selection)) if lower_sele[ii]]
#upper_sele = [ii for ii in range(len(selection)) if upper_sele[ii]]
#lower_bound = np.min(lower_sele)
#upper_bound = np.max(upper_sele)
lower_bound = 1501
upper_bound = 1600
msd_keys = ["POPC", "POPS", "POP2", "CHOL", "ALL"]
plot_colors = ["tab:blue", "tab:orange", "tab:red",
"tab:green", "tab:purple"]
# Initialize plotting.
fig, ax = plt.subplots(figsize = (15, 5))
for ii in range(len(msd_keys)):
current_key = msd_keys[ii]
# Plot data.
ax.plot(time[lower_bound:upper_bound],
msd[current_key][lower_bound:upper_bound],
label = current_key, color = plot_colors[ii])
# Calculate equation for trendline
z = np.polyfit(time[lower_bound:upper_bound],
msd[current_key][lower_bound:upper_bound], 1)
p = np.poly1d(z)
# Add trendline to plot
ax.plot(time[lower_bound:upper_bound],
p(time[lower_bound:upper_bound]),
label = current_key, color = plot_colors[ii],
linestyle = "dashed")
slope, intercept, r_value, p_value, std_err = \
scipy.stats.linregress(time[lower_bound:upper_bound],
msd[current_key][lower_bound:upper_bound])
print("The " + current_key + \
" diffusion coefficient is: %f" % (slope / 6.))
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# Set labels
ax.set_xlabel("Time (μs)")
ax.set_ylabel("MSD (Ų)")
# Legend lines
popc_line = mlines.Line2D([], [], color = "tab:blue", label = "POPC")
pops_line = mlines.Line2D([], [], color = "tab:orange", label = "POPS")
pop2_line = mlines.Line2D([], [], color = "tab:red", label = "POP2")
chol_line = mlines.Line2D([], [], color = "tab:green", label = "CHOL")
all_line = mlines.Line2D([], [], color = "tab:purple", label = "ALL")
ax.legend(handles = [popc_line, pops_line, pop2_line,
chol_line, all_line])
plt.show()
The POPC diffusion coefficient is: 4049.415038 The POPS diffusion coefficient is: 3948.195776 The POP2 diffusion coefficient is: 3053.942487 The CHOL diffusion coefficient is: 5204.770923 The ALL diffusion coefficient is: 4088.231257
print("The slope is: %f" % slope)
print("The r-squared value is: %f" % (r_value * r_value))
print("The p-value is: %f" % p_value)
The slope is: 25374.713601 The r-squared value is: 0.975179 The p-value is: 0.000000
Protein Orientation¶
Reinitialize Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
Generate Selections for Individual A2s¶
# Proteins to select for contact analysis
# A B C D E F G H I J
protein_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
id_sele = {}
for pid in protein_ids:
id_sele[pid] = np.logical_and((12785 + (pid * 308)) <= protein.resids,
protein.resids <= (13092 + (pid * 308)))
id_sele
{0: array([ True, True, True, ..., False, False, False]),
1: array([False, False, False, ..., False, False, False]),
2: array([False, False, False, ..., False, False, False]),
3: array([False, False, False, ..., False, False, False]),
4: array([False, False, False, ..., False, False, False]),
5: array([False, False, False, ..., False, False, False]),
6: array([False, False, False, ..., False, False, False]),
7: array([False, False, False, ..., False, False, False]),
8: array([False, False, False, ..., False, False, False]),
9: array([False, False, False, ..., True, True, True])}
TEST: Calculate Principal Axes¶
Process for Calculating Angle of Orientation
- Translate system such that the vesicle center of mass is at the origin.
- Define $v_{1}$ and $v_{3}$ using the
principal_axesmethod of MDAnalysis. - Define and normalize $v_{com}$ using the center of mass of the A2 subunit.
- Ensure $v_{1}$ and $v_{3}$ are oriented properly.
- $v_{1}$ should be closer to ASP-298-SC1 than ASP-123-SC1.
- $v_{3}$ should be closer to ARG-195-SC2 than ARG-77-SC2.
- NOTE: Add each $v$ to the A2 COM prior to checking distance.
- NOTE: Correct the vector by multiplying by -1.
- Align $v_{3}$ to the x-axis.
- NOTE: Perform same rotations on the other vectors.
- Rotate $v_{1}$ about the x-axis until it is aligned with the y-axis.
- NOTE: Perform the same rotation on the ${v}$
- Project $v_{com}$ onto the xy-plane by taking the x and y coordinates.
- Rotate $v_{com}$ about the z-axis until it is aligned with the y-axis.
- Calculate the directional angle between $v_{1}$ and the y-axis.
import geometry_analysis as ga
ii = 0
a2_selection = id_sele[ii]
# 1) Translate system such that vesicle is at the center
protein.atoms.positions = protein.atoms.positions - \
bilayer.center_of_mass()
# 2) Get the principal axes
axes = protein.atoms[a2_selection].principal_axes()
v1 = axes[0, :]
v3 = axes[2, :]
# 3) Define and normalize a vector using the A2 COM
vcom = protein.atoms[a2_selection].center_of_mass()
vcom = vcom / np.linalg.norm(vcom)
# 4) Check v1 and v3 orientation; correct, if needed.
a2_com = protein.atoms[a2_selection].center_of_mass()
# Get that atom locations
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 298 - 31)
selection = selection + " and name SC1"
v1_atom_correct = u.select_atoms(selection).atoms.positions[0]
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 123 - 31)
selection = selection + " and name SC1"
v1_atom_wrong = u.select_atoms(selection).atoms.positions[0]
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 195 - 31)
selection = selection + " and name SC2"
v3_atom_correct = u.select_atoms(selection).atoms.positions[0]
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 77 - 31)
selection = selection + " and name SC2"
v3_atom_wrong = u.select_atoms(selection).atoms.positions[0]
# Compare atom distances to determine if orientation is correct.
correct_distance = np.linalg.norm((v1 + a2_com) - v1_atom_correct)
wrong_distance = np.linalg.norm((v1 + a2_com) - v1_atom_wrong)
if correct_distance < wrong_distance:
pass
else:
v1 = v1 * -1.
correct_distance = np.linalg.norm((v3 + a2_com) - v3_atom_correct)
wrong_distance = np.linalg.norm((v3 + a2_com) - v3_atom_wrong)
if correct_distance < wrong_distance:
pass
else:
v3 = v3 * -1.
# 5) Align v3 to the x-axis
rot_mat = ga.rotation_matrix_from_vectors(v3, np.array([1., 0., 0.]))
v3 = rot_mat.dot(v3)
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)
# 6) Align v1 with the y-axis
#angle = ga.directional_angle(v3, [1., 1., 1.],
# [0., 0., 0.])
rot_mat = ga.rotation_matrix_from_vectors(v1, np.array([0., 1., 0.]))
v3 = rot_mat.dot(v3)
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)
# 7) Project vcom onto the xy plane
vcom = np.array([vcom[0], vcom[1], 0.])
# 8) Align vcom with the y-axis
rot_mat = ga.rotation_matrix_from_vectors(vcom, np.array([0., 1., 0.]))
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)
# 9) Calculate directional angle between v1 and the y-axis
#ga.directional_angle(v1, [0., 1., 0.], [0., 0., 1.])
ga.directional_angle([0., 1., 0.], v1, [0., 0., 1.])
1.4232829753661784
import geometry_analysis as ga
# Collection of all sampled angles
angles = []
# Collection of angles vs time for all proteins
individual_angles = {}
# All monomers are associated by this point
lower_cut = 80000
upper_cut = 110000
for key in id_sele:
a2_selection = id_sele[key]
individual_angles[key] = []
#for ts in u.trajectory[lower_cut:upper_cut:100]:
for ts in u.trajectory[80000::100]:
# 1) Translate system such that vesicle is at the center
protein.atoms.positions = protein.atoms.positions - \
bilayer.center_of_mass()
# 2) Get the principal axes
axes = protein.atoms[a2_selection].principal_axes()
v1 = axes[0, :]
v3 = axes[2, :]
# 3) Define and normalize a vector using the A2 COM
vcom = protein.atoms[a2_selection].center_of_mass()
vcom = vcom / np.linalg.norm(vcom)
# 4) Check v1 and v3 orientation; correct, if needed.
a2_com = protein.atoms[a2_selection].center_of_mass()
# Get that atom locations
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 298 - 31)
selection = selection + " and name SC1"
v1_atom_correct = u.select_atoms(selection).atoms.positions[0]
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 123 - 31)
selection = selection + " and name SC1"
v1_atom_wrong = u.select_atoms(selection).atoms.positions[0]
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 195 - 31)
selection = selection + " and name SC2"
v3_atom_correct = u.select_atoms(selection).atoms.positions[0]
selection = "resnum "
selection = selection + str(12785 + (ii * 308) + 77 - 31)
selection = selection + " and name SC2"
v3_atom_wrong = u.select_atoms(selection).atoms.positions[0]
# Compare atom distances to determine if orientation is correct.
correct_distance = np.linalg.norm((v1 + a2_com) - v1_atom_correct)
wrong_distance = np.linalg.norm((v1 + a2_com) - v1_atom_wrong)
if correct_distance < wrong_distance:
pass
else:
v1 = v1 * -1.
correct_distance = np.linalg.norm((v3 + a2_com) - v3_atom_correct)
wrong_distance = np.linalg.norm((v3 + a2_com) - v3_atom_wrong)
if correct_distance < wrong_distance:
pass
else:
v3 = v3 * -1.
# 5) Align v3 to the x-axis
rot_mat = ga.rotation_matrix_from_vectors(v3, np.array([1., 0., 0.]))
v3 = rot_mat.dot(v3)
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)
# 6) Align v1 with the y-axis
#angle = ga.directional_angle(v3, [1., 1., 1.],
# [0., 0., 0.])
rot_mat = ga.rotation_matrix_from_vectors(v1, np.array([0., 1., 0.]))
v3 = rot_mat.dot(v3)
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)
# 7) Project vcom onto the xy plane
vcom = np.array([vcom[0], vcom[1], 0.])
# 8) Align vcom with the y-axis
rot_mat = ga.rotation_matrix_from_vectors(vcom, np.array([0., 1., 0.]))
v1 = rot_mat.dot(v1)
rot_mat = rot_mat.dot(vcom)
# 9) Calculate directional angle between v1 and the y-axis
#ga.directional_angle(v1, [0., 1., 0.], [0., 0., 1.])
angles.append(ga.directional_angle([0., 1., 0.], v1, [0., 0., 1.]))
individual_angles[key].append(angles[-1])
angles = np.array(angles)
#angles = angles * (180. / np.pi)
np.seterr(all = "ignore")
fig, axs = plt.subplots(10, 1, figsize=(25, 40))
for ii in range(len(individual_angles)):
angles = np.array(individual_angles[ii]) * 180. / np.pi
axs[ii].set_ylim(0, 360)
axs[ii].plot(full_time[80000::100], angles, label = "POPC")
axs[ii].set_title(str(ii))
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc = "center right")
plt.show()
N = 80
bottom = 8
max_height = 4
fig, ax = plt.subplots(figsize=(25, 40))
hist_data = np.histogram(angles, range = [0., np.pi * 2.], bins = 80)
theta = hist_data[1]
theta = (theta[1:] + theta[:-1]) / 2.
counts = hist_data[0]
radii = (counts / np.max(counts)) * max_height
width = (2*np.pi) / N
ax = plt.subplot(111, polar=True)
bars = ax.bar(theta, radii, width=width, bottom=bottom)
#ax.xaxis("off")
#ax.axes.get_yaxis().set_visible(False)
ax.axes.get_yaxis().set_ticks([8., 9., 10., 11.])
ax.axes.yaxis.set_ticklabels([])
ax.tick_params(axis='x', labelrotation=270)
# Use custom colors and opacity
for r, bar in zip(radii, bars):
bar.set_facecolor(plt.cm.viridis(r / 10.))
bar.set_alpha(0.8)
plt.show()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Input In [390], in <cell line: 16>() 13 radii = (counts / np.max(counts)) * max_height 14 width = (2*np.pi) / N ---> 16 ax = plt.subplot(111, polar=True, figure = fig) 17 bars = ax.bar(theta, radii, width=width, bottom=bottom) 18 #ax.xaxis("off") 19 #ax.axes.get_yaxis().set_visible(False) File ~/apps/miniconda3/envs/tutorial/lib/python3.10/site-packages/matplotlib/pyplot.py:1283, in subplot(*args, **kwargs) 1280 break 1281 else: 1282 # we have exhausted the known Axes and none match, make a new one! -> 1283 ax = fig.add_subplot(*args, **kwargs) 1285 fig.sca(ax) 1287 bbox = ax.bbox File ~/apps/miniconda3/envs/tutorial/lib/python3.10/site-packages/matplotlib/figure.py:752, in FigureBase.add_subplot(self, *args, **kwargs) 648 """ 649 Add an `~.axes.Axes` to the figure as part of a subplot arrangement. 650 (...) 747 fig.add_subplot(ax1) # add ax1 back to the figure 748 """ 749 if 'figure' in kwargs: 750 # Axes itself allows for a 'figure' kwarg, but since we want to 751 # bind the created Axes to self, it is not allowed here. --> 752 raise TypeError( 753 "add_subplot() got an unexpected keyword argument 'figure'") 755 if len(args) == 1 and isinstance(args[0], SubplotBase): 756 ax = args[0] TypeError: add_subplot() got an unexpected keyword argument 'figure'
N = 80
bottom = 8
max_height = 4
fig, ax = plt.subplots(subplot_kw={'projection': 'polar'}, figsize=(15, 15))
hist_data = np.histogram(angles, range = [0., np.pi * 2.], bins = 80)
theta = hist_data[1]
theta = (theta[1:] + theta[:-1]) / 2.
counts = hist_data[0]
radii = (counts / np.max(counts)) * max_height
width = (2*np.pi) / N
#ax = plt.subplot(111, polar=True)
bars = ax.bar(theta, radii, width=width, bottom=bottom)
#ax.xaxis("off")
#ax.axes.get_yaxis().set_visible(False)
ax.axes.get_yaxis().set_ticks([8., 9., 10., 11.])
ax.axes.yaxis.set_ticklabels([])
ax.tick_params(axis='x', labelrotation=270)
# Use custom colors and opacity
for r, bar in zip(radii, bars):
bar.set_facecolor(plt.cm.viridis(r / 10.))
bar.set_alpha(0.8)
plt.show()
Lipid Distance Matrix¶
6.1) Reinitialize Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
selection = "(resname POPC and name PO4 NC3) or " + \
"(resname POPS and name PO4 CNO) or " + \
"(resname POP2 and name P1 P2 C1 C2 C3 PO4) or " + \
"(resname CHOL)"
head_group = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
bilayer = u.select_atoms("resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL")
6.2) Collect Lipid Head Center of Mass Data¶
lipid_head_com = {}
#for lip in ["POPC", "POPS", "POP2", "CHOL"]:
#for lip in ["POPC"]:
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
# Generate selection and np.array to store data.
lipid_sele = head_group.select_atoms("resname " + lip)
lipid_head_com[lip] = np.empty
lipid_head_com[lip] = np.empty((int(u.trajectory.n_frames / frame_skip) + 1,
lipid_sele.n_residues,
3))
ii = 0
for ts in u.trajectory[::frame_skip]:
# Check progress
if ii % track_divisor == 0:
print(int(((ii / track_divisor)) * 10), "% ", sep = '', end = '')
lipid_sele.positions = lipid_sele.positions - bilayer.center_of_mass()
lipid_head_com[lip][ii, :] = lipid_sele.center_of_mass(compound = 'residues')
ii += 1
print(' ')
0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
6.3) Get Upper Leaflet Head Group COMs¶
for lip in ["POPC", "POPS", "POP2", "CHOL"]:
head_com = lipid_head_com[lip]
# Get the magnitude of the COM vectors
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
# Use the average magnitude to seperate the distributions
upper_leaf_min = np.mean(dist_from_center)
upper_selection = dist_from_center > upper_leaf_min
dist_from_center = np.sqrt(np.sum(head_com[0, :, :] * head_com[0, :, :], axis = 1))
upper_selection = [ii for ii in range(len(upper_selection)) if upper_selection[ii]]
# Define the upper-leaflet lipids
lipid_head_com[lip] = head_com[:, upper_selection, :]
6.4) Function to Cluster a Matrix Based on Row/Colum Similarity¶
import scipy
import scipy.cluster.hierarchy as sch
def cluster_corr(corr_array, inplace=False):
"""
Rearranges the correlation matrix, corr_array, so that groups of highly
correlated variables are next to eachother
Parameters
----------
corr_array : pandas.DataFrame or numpy.ndarray
a NxN correlation matrix
Returns
-------
pandas.DataFrame or numpy.ndarray
a NxN correlation matrix with the columns and rows rearranged
"""
pairwise_distances = sch.distance.pdist(corr_array)
linkage = sch.linkage(pairwise_distances, method='complete')
cluster_distance_threshold = pairwise_distances.max()/2
idx_to_cluster_array = sch.fcluster(linkage, cluster_distance_threshold,
criterion='distance')
idx = np.argsort(idx_to_cluster_array)
if not inplace:
corr_array = corr_array.copy()
if isinstance(corr_array, pd.DataFrame):
return corr_array.iloc[idx, :].T.iloc[idx, :]
return corr_array[idx, :][:, idx]
6.5) Calculate and Plot Distance Matrices for First and Last Frames¶
# Select lipid to perform calculation for
lip = "POP2"
# Generate plot
fig, axs = plt.subplots(1, 2, figsize = (12, 5))
jj = 0
for ii in [0, -1]:
'''
Create indicies for selecting all combinations of lipids
'''
axis_1 = np.linspace(0, 582 - 1, 582, dtype = int)
axis_2 = np.linspace(0, 582 - 1, 582, dtype = int)
index_1, index_2 = np.array(np.meshgrid(axis_1, axis_2))
x = index_1.flatten()
y = index_2.flatten()
'''
Calculate the arclengths between all lipids
'''
# Collect the lipid coordinates for the frame
frame = lipid_head_com[lip][ii, :, :]
# Calculate the arc-length between all all lipids
angles = np.sum(frame[x, :] * frame[y, :], axis = 1)
angles = angles / \
(np.linalg.norm(frame[x, :], axis = 1) * \
np.linalg.norm(frame[y, :], axis = 1))
angles = np.arccos(angles)
arclengths = angles * average_upper_radius
'''
Reshape arclengths into matrix and generate grid
coordinated for plotting
'''
z = arclengths.reshape(582, 582)
z = np.nan_to_num(z)
x = x.reshape(582, 582)
y = y.reshape(582, 582)
# Cluster matrix based on similarity
z = cluster_corr(z)
'''
Plot Results
'''
axs[jj].contourf(x, y, z, levels = [0, 50.], cmap = "viridis")
jj += 1
plt.show()
/tmp/ipykernel_7545/3285562180.py:29: RuntimeWarning: invalid value encountered in arccos angles = np.arccos(angles)
Lipid Contacts¶
Reinitialize the Data¶
# Load trajectory
u = mda.Universe('reimage/image.tpr', 'reimage/reimaged.xtc')
# Create selections for groups that may be used for analysis
selection = "(not resname POPC) and " + \
"(not resname POPS) and " + \
"(not resname POP2) and " + \
"(not resname CHOL)"
protein = u.select_atoms(selection)
# Create selections for groups that may be used for analysis
selection = "resname POPC or resname POPS or " + \
"resname POP2 or resname CHOL"
bilayer = u.select_atoms(selection)
Define Functions¶
- Generate the pairwise combinations of a given list-like.
- Defines a ufunc to convert an array of 3-letter amino acid codes to an array of 1-letter codes.
def numpy_pairwise_combinations(x):
idx = np.stack(np.triu_indices(len(x), k = 0), axis=-1)
return x[idx]
convert_aa = np.frompyfunc(mda.lib.util.convert_aa_code, 1, 1)
Calculate Pairwise Distances¶
%%time
# Used to collect all of the information about each
# contact
contact_data = {"Distance" : [], "Partner_1" : [],
"Partner_2" : [], "Contact" : []}
avg_contacts = []
ii = 0
for ts in u.trajectory[::100]:
test_data = lipid_head_com["POP2"][ii, :, :]
test_data
search_results = mda.lib.distances.self_capped_distance(test_data, 20.0)
counts = np.unique(search_results[0].flatten(), return_counts = True)[1]
counts = np.concatenate([np.zeros(582 - len(counts)), counts])
avg_contacts.append(np.mean(counts))
ii += 1
CPU times: user 12.9 s, sys: 115 ms, total: 13 s Wall time: 13 s
# Initialize plotting.
fig, ax = plt.subplots(figsize = (15, 4))
# Remove top and right spines from the figure.
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
# ---
ax.plot(time, avg_contacts) # Plot data
# ---
ax.set_xlabel("Time (us)") # Set x label
ax.set_ylabel("Avg Pairwise Dist (Ã…)") # Set y label
# Calculate equation for trendline
z = np.polyfit(time, avg_contacts, 1)
p = np.poly1d(z)
slope, intercept, r_value, p_value, std_err = \
scipy.stats.linregress(time, avg_contacts)
# Add trendline to plot
ax.plot(time, p(time), linestyle = "dashed",color = "orange")
#ax.annotate("R² = " + str(round(r_value, 2)), xy = (0., 3.),
# xytext = (0., 3.))
plt.show()